Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp

llvmorg-10.0.0-129-gd24d5c8e308. Getting closer to 10.0.1-rc2. MFC after: 3 weeks
2020-06-25 08:15:10 +00:00 · 2020-06-25 08:15:10 +00:00 · d3a8f8c8d5
commit d3a8f8c8d5
parent f64b25b13f b00e6b27b0
53 changed files with 2197 additions and 410 deletions
--- a/contrib/llvm-project/clang/include/clang/Driver/Options.td
+++ b/contrib/llvm-project/clang/include/clang/Driver/Options.td
@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">,
  Group<m_Group>, Flags<[CoreOption,CC1Option]>;
 def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
  Group<m_Group>, Flags<[CoreOption]>;
+def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Enable all mitigations for Load Value Injection (LVI)">;
+def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Disable mitigations for Load Value Injection (LVI)">;
+def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
+def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
+  HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;

 def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
  HelpText<"Enable linker relaxation">;
--- a/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h
+++ b/contrib/llvm-project/clang/lib/Basic/Targets/PPC.h
@ -276,11 +276,12 @@ public:
      break;
    case 'Q': // Memory operand that is an offset from a register (it is
              // usually better to use `m' or `es' in asm statements)
+      Info.setAllowsRegister();
+      LLVM_FALLTHROUGH;
    case 'Z': // Memory operand that is an indexed or indirect from a
              // register (it is usually better to use `m' or `es' in
              // asm statements)
      Info.setAllowsMemory();
-      Info.setAllowsRegister();
      break;
    case 'R': // AIX TOC entry
    case 'a': // Address operand that is an indexed or indirect from a
--- a/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/SanitizerArgs.cpp
@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
        << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
  }

-  if ((Kinds & SanitizerKind::ShadowCallStack) &&
-      TC.getTriple().getArch() == llvm::Triple::aarch64 &&
+  if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() &&
      !llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) &&
      !Args.hasArg(options::OPT_ffixed_x18)) {
    D.Diag(diag::err_drv_argument_only_allowed_with)
--- a/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChain.cpp
@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
  if (getTriple().getArch() == llvm::Triple::x86 ||
      getTriple().getArch() == llvm::Triple::x86_64 ||
      getTriple().getArch() == llvm::Triple::arm ||
-      getTriple().getArch() == llvm::Triple::aarch64 ||
      getTriple().getArch() == llvm::Triple::wasm32 ||
-      getTriple().getArch() == llvm::Triple::wasm64)
+      getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
    Res |= SanitizerKind::CFIICall;
-  if (getTriple().getArch() == llvm::Triple::x86_64 ||
-      getTriple().getArch() == llvm::Triple::aarch64)
+  if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
    Res |= SanitizerKind::ShadowCallStack;
-  if (getTriple().getArch() == llvm::Triple::aarch64 ||
-      getTriple().getArch() == llvm::Triple::aarch64_be)
+  if (getTriple().isAArch64())
    Res |= SanitizerKind::MemTag;
  return Res;
 }
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/X86.cpp
@ -147,6 +147,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
  // flags). This is a bit hacky but keeps existing usages working. We should
  // consider deprecating this and instead warn if the user requests external
  // retpoline thunks and *doesn't* request some form of retpolines.
+  auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
  if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
                         options::OPT_mspeculative_load_hardening,
                         options::OPT_mno_speculative_load_hardening)) {
@ -154,12 +155,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
                     false)) {
      Features.push_back("+retpoline-indirect-calls");
      Features.push_back("+retpoline-indirect-branches");
+      SpectreOpt = options::OPT_mretpoline;
    } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
                            options::OPT_mno_speculative_load_hardening,
                            false)) {
      // On x86, speculative load hardening relies on at least using retpolines
      // for indirect calls.
      Features.push_back("+retpoline-indirect-calls");
+      SpectreOpt = options::OPT_mspeculative_load_hardening;
    }
  } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
                          options::OPT_mno_retpoline_external_thunk, false)) {
@ -167,6 +170,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
    // eventually switch to an error here.
    Features.push_back("+retpoline-indirect-calls");
    Features.push_back("+retpoline-indirect-branches");
+    SpectreOpt = options::OPT_mretpoline_external_thunk;
+  }
+
+  auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
+  if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening,
+                   false)) {
+    Features.push_back("+lvi-load-hardening");
+    Features.push_back("+lvi-cfi"); // load hardening implies CFI protection
+    LVIOpt = options::OPT_mlvi_hardening;
+  } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi,
+                          false)) {
+    Features.push_back("+lvi-cfi");
+    LVIOpt = options::OPT_mlvi_cfi;
+  }
+
+  if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
+      LVIOpt != clang::driver::options::ID::OPT_INVALID) {
+    D.Diag(diag::err_drv_argument_not_allowed_with)
+        << D.getOpts().getOptionName(SpectreOpt)
+        << D.getOpts().getOptionName(LVIOpt);
  }

  // Now add any that the user explicitly requested on the command line,
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFGraph.h
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFLiveness.h
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/RDFRegisters.h
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
  def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">,
+            Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                       llvm_v1i128_ty], [IntrNoMem]>;
  def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
                       llvm_v4i32_ty], [IntrNoMem]>;
--- a/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/ManagedStatic.h
@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
 // constexpr, a dynamic initializer may be emitted depending on optimization
 // settings. For the affected versions of MSVC, use the old linker
 // initialization pattern of not providing a constructor and leaving the fields
-// uninitialized.
-#if !defined(_MSC_VER) || defined(__clang__)
+// uninitialized. See http://llvm.org/PR41367 for details.
+#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__)
 #define LLVM_USE_CONSTEXPR_CTOR
 #endif

--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetSelectionDAG.td
@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
  let IsLoad = 1;
  let MemoryVT = i32;
 }
+def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+  let IsLoad = 1;
+  let MemoryVT = f16;
+}
 def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
  let IsLoad = 1;
  let MemoryVT = f32;
@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
  let IsStore = 1;
  let MemoryVT = i32;
 }
+def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr)> {
+  let IsStore = 1;
+  let MemoryVT = f16;
+}
 def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
                            (truncstore node:$val, node:$ptr)> {
  let IsStore = 1;
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFGraph.cpp
@ -8,8 +8,6 @@
 //
 // Target-independent, SSA-based data flow graph for register data flow (RDF).
 //
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@ -20,6 +18,8 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
  const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
  if (RegisterId R = TLI.getExceptionPointerRegister(PF))
    LR.insert(RegisterRef(R));
-  if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
-    LR.insert(RegisterRef(R));
+  if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
+    if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+      LR.insert(RegisterRef(R));
+  }
  return LR;
 }

--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@ -22,9 +22,6 @@
 // and Embedded Architectures and Compilers", 8 (4),
 // <10.1145/2086696.2086706>. <hal-00647369>
 //
-#include "RDFLiveness.h"
-#include "RDFGraph.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@ -33,6 +30,9 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFRegisters.cpp
@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//

-#include "RDFRegisters.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
--- a/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm-project/llvm/lib/LTO/LTO.cpp
@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(
  // Include the hash for the current module
  auto ModHash = Index.getModuleHash(ModuleID);
  Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+
+  std::vector<uint64_t> ExportsGUID;
+  ExportsGUID.reserve(ExportList.size());
  for (const auto &VI : ExportList) {
    auto GUID = VI.getGUID();
+    ExportsGUID.push_back(GUID);
+  }
+
+  // Sort the export list elements GUIDs.
+  llvm::sort(ExportsGUID);
+  for (uint64_t GUID : ExportsGUID) {
    // The export list can impact the internalization, be conservative here
    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
  }
@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(
  // Include the hash for every module we import functions from. The set of
  // imported symbols for each module may affect code generation and is
  // sensitive to link order, so include that as well.
-  for (auto &Entry : ImportList) {
-    auto ModHash = Index.getModuleHash(Entry.first());
+  using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
+  std::vector<ImportMapIteratorTy> ImportModulesVector;
+  ImportModulesVector.reserve(ImportList.size());
+
+  for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
+       ++It) {
+    ImportModulesVector.push_back(It);
+  }
+  llvm::sort(ImportModulesVector,
+             [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
+                 -> bool { return Lhs->getKey() < Rhs->getKey(); });
+  for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
+    auto ModHash = Index.getModuleHash(EntryIt->first());
    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));

-    AddUint64(Entry.second.size());
-    for (auto &Fn : Entry.second)
+    AddUint64(EntryIt->second.size());
+    for (auto &Fn : EntryIt->second)
      AddUint64(Fn);
  }

--- a/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,

  LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
                    << val << '\n');
-  SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
+  SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));

  // After replacement, the current node is dead, we need to
  // go backward one step to make iterator still work
--- a/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/BPF/BTFDebug.cpp
@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
                              bool CheckPointer, bool SeenPointer) {
  if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
    TypeId = DIToIdMap[Ty];
+
+    // To handle the case like the following:
+    //    struct t;
+    //    typedef struct t _t;
+    //    struct s1 { _t *c; };
+    //    int test1(struct s1 *arg) { ... }
+    //
+    //    struct t { int a; int b; };
+    //    struct s2 { _t c; }
+    //    int test2(struct s2 *arg) { ... }
+    //
+    // During traversing test1() argument, "_t" is recorded
+    // in DIToIdMap and a forward declaration fixup is created
+    // for "struct t" to avoid pointee type traversal.
+    //
+    // During traversing test2() argument, even if we see "_t" is
+    // already defined, we should keep moving to eventually
+    // bring in types for "struct t". Otherwise, the "struct s2"
+    // definition won't be correct.
+    if (Ty && (!CheckPointer || !SeenPointer)) {
+      if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+        unsigned Tag = DTy->getTag();
+        if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+            Tag == dwarf::DW_TAG_volatile_type ||
+            Tag == dwarf::DW_TAG_restrict_type) {
+          uint32_t TmpTypeId;
+          visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
+                         SeenPointer);
+        }
+      }
+    }
+
    return;
  }

--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@ -12,9 +12,6 @@
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
@ -27,6 +24,9 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@ -11,9 +11,6 @@
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "RDFCopy.h"
 #include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@ -24,6 +21,9 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.cpp
@ -11,13 +11,13 @@
 //===----------------------------------------------------------------------===//

 #include "RDFCopy.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFCopy.h
@ -9,9 +9,9 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
 #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H

-#include "RDFGraph.h"
-#include "RDFLiveness.h"
-#include "RDFRegisters.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/CodeGen/RDFRegisters.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include <map>
 #include <vector>
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@ -9,13 +9,13 @@
 // RDF-based generic dead code elimination.

 #include "RDFDeadCode.h"
-#include "RDFGraph.h"
-#include "RDFLiveness.h"

 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
 #include "llvm/Support/Debug.h"

 #include <queue>
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/RDFDeadCode.h
@ -23,8 +23,8 @@
 #ifndef RDF_DEADCODE_H
 #define RDF_DEADCODE_H

-#include "RDFGraph.h"
-#include "RDFLiveness.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
 #include "llvm/ADT/SetVector.h"

 namespace llvm {
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td
@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
    VMSUMSHS,
    VMSUMUBM,
    VMSUMUHM,
+    VMSUMUDM,
    VMSUMUHS,
    VMULESB,
    VMULESH,
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPC.td
@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                  "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                        "Implement mftb using the mfspr instruction">;
+def FeatureUnalignedFloats :
+  SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
+                   "true", "CPU does not trap on unaligned FP access">;
 def FeaturePPCPreRASched:
  SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
                   "Use PowerPC pre-RA scheduling strategy">;
@ -252,7 +255,8 @@ def ProcessorFeatures {
                                                  FeatureExtDiv,
                                                  FeatureMFTB,
                                                  DeprecatedDST,
-                                                  FeatureTwoConstNR];
+                                                  FeatureTwoConstNR,
+                                                  FeatureUnalignedFloats];
  list<SubtargetFeature> P7SpecificFeatures = [];
  list<SubtargetFeature> P7Features =
    !listconcat(P7InheritableFeatures, P7SpecificFeatures);
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
  }

+  if (Subtarget.isISA3_0()) {
+    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+    setTruncStoreAction(MVT::f64, MVT::f16, Legal);
+    setTruncStoreAction(MVT::f32, MVT::f16, Legal);
+  } else {
+    // No extending loads from f16 or HW conversions back and forth.
+    setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+    setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+    setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+    setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
+    setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+  }
+
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);

  // PowerPC has pre-inc load and store's.
@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
      }
    }
+    setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
    if (!Subtarget.hasP8Vector()) {
      setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
      setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
  assert(Op.getOpcode() == ISD::FP_EXTEND &&
         "Should only be called for ISD::FP_EXTEND");

+  // FIXME: handle extends from half precision float vectors on P9.
  // We only want to custom lower an extend from v2f32 to v2f64.
  if (Op.getValueType() != MVT::v2f64 ||
      Op.getOperand(0).getValueType() != MVT::v2f32)
@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
  case ISD::BITCAST:
    // Don't handle bitcast here.
    return;
+  case ISD::FP_EXTEND:
+    SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
+    if (Lowered)
+      Results.push_back(Lowered);
+    return;
  }
 }

@ -15255,6 +15279,10 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
  if (!VT.isSimple())
    return false;

+  if (VT.isFloatingPoint() && !VT.isVector() &&
+      !Subtarget.allowsUnalignedFPAccess())
+    return false;
+
  if (VT.getSimpleVT().isVector()) {
    if (Subtarget.hasVSX()) {
      if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@ -637,7 +637,7 @@ namespace llvm {
    /// then the VPERM for the shuffle. All in all a very slow sequence.
    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
      const override {
-      if (VT.getScalarSizeInBits() % 8 == 0)
+      if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
        return TypeWidenVector;
      return TargetLoweringBase::getPreferredVectorAction(VT);
    }
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
 def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
 let Predicates = [HasP9Altivec] in {

+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+                            v1i128, v2i64, v1i128>;
+
 // i8 element comparisons.
 def VCMPNEB   : VCMP   <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
 def VCMPNEB_rec  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
  if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
    return false;

+  // The operand may not necessarily be an immediate - it could be a relocation.
+  if (!ADDIMI.getOperand(2).isImm())
+    return false;
+
  Imm = ADDIMI.getOperand(2).getImm();

  return true;
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
            (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;

+  // Load/convert and convert/store patterns for f16.
+  def : Pat<(f64 (extloadf16 xoaddr:$src)),
+            (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+  def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+            (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+  def : Pat<(f32 (extloadf16 xoaddr:$src)),
+            (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+  def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+            (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+  def : Pat<(f64 (f16_to_fp i32:$A)),
+            (f64 (XSCVHPDP (MTVSRWZ $A)))>;
+  def : Pat<(f32 (f16_to_fp i32:$A)),
+            (f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
+  def : Pat<(i32 (fp_to_f16 f32:$A)),
+            (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
+  def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+
  let Predicates = [IsBigEndian, HasP9Vector] in {
  // Scalar stores of i8
  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCSubtarget.h
@ -124,6 +124,7 @@ protected:
  bool IsPPC4xx;
  bool IsPPC6xx;
  bool FeatureMFTB;
+  bool AllowsUnalignedFPAccess;
  bool DeprecatedDST;
  bool HasLazyResolverStubs;
  bool IsLittleEndian;
@ -274,6 +275,7 @@ public:
  bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
  bool isE500() const { return IsE500; }
  bool isFeatureMFTB() const { return FeatureMFTB; }
+  bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
  bool isDeprecatedDST() const { return DeprecatedDST; }
  bool hasICBT() const { return HasICBT; }
  bool hasInvariantFunctionDescriptors() const {
--- a/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
@ -0,0 +1,446 @@
+//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: ImmutableGraph is a fast DAG implementation that cannot be
+/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
+/// implemented as two arrays: one containing nodes, and one containing edges.
+/// The advantages to this implementation are two-fold:
+/// 1. Iteration and traversal operations benefit from cache locality.
+/// 2. Operations on sets of nodes/edges are efficient, and representations of
+///    those sets in memory are compact. For instance, a set of edges is
+///    implemented as a bit vector, wherein each bit corresponds to one edge in
+///    the edge array. This implies a lower bound of 64x spatial improvement
+///    over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
+///    insert/erase/contains operations complete in negligible constant time:
+///    insert and erase require one load and one store, and contains requires
+///    just one load.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
+  using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
+  template <typename> friend class ImmutableGraphBuilder;
+
+public:
+  using node_value_type = NodeValueT;
+  using edge_value_type = EdgeValueT;
+  using size_type = int;
+  class Node;
+  class Edge {
+    friend class ImmutableGraph;
+    template <typename> friend class ImmutableGraphBuilder;
+
+    const Node *Dest;
+    edge_value_type Value;
+
+  public:
+    const Node *getDest() const { return Dest; };
+    const edge_value_type &getValue() const { return Value; }
+  };
+  class Node {
+    friend class ImmutableGraph;
+    template <typename> friend class ImmutableGraphBuilder;
+
+    const Edge *Edges;
+    node_value_type Value;
+
+  public:
+    const node_value_type &getValue() const { return Value; }
+
+    const Edge *edges_begin() const { return Edges; }
+    // Nodes are allocated sequentially. Edges for a node are stored together.
+    // The end of this Node's edges is the beginning of the next node's edges.
+    // An extra node was allocated to hold the end pointer for the last real
+    // node.
+    const Edge *edges_end() const { return (this + 1)->Edges; }
+    ArrayRef<Edge> edges() const {
+      return makeArrayRef(edges_begin(), edges_end());
+    }
+  };
+
+protected:
+  ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
+                 size_type NodesSize, size_type EdgesSize)
+      : Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
+        EdgesSize(EdgesSize) {}
+  ImmutableGraph(const ImmutableGraph &) = delete;
+  ImmutableGraph(ImmutableGraph &&) = delete;
+  ImmutableGraph &operator=(const ImmutableGraph &) = delete;
+  ImmutableGraph &operator=(ImmutableGraph &&) = delete;
+
+public:
+  ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
+  const Node *nodes_begin() const { return nodes().begin(); }
+  const Node *nodes_end() const { return nodes().end(); }
+
+  ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
+  const Edge *edges_begin() const { return edges().begin(); }
+  const Edge *edges_end() const { return edges().end(); }
+
+  size_type nodes_size() const { return NodesSize; }
+  size_type edges_size() const { return EdgesSize; }
+
+  // Node N must belong to this ImmutableGraph.
+  size_type getNodeIndex(const Node &N) const {
+    return std::distance(nodes_begin(), &N);
+  }
+  // Edge E must belong to this ImmutableGraph.
+  size_type getEdgeIndex(const Edge &E) const {
+    return std::distance(edges_begin(), &E);
+  }
+
+  // FIXME: Could NodeSet and EdgeSet be templated to share code?
+  class NodeSet {
+    const ImmutableGraph &G;
+    BitVector V;
+
+  public:
+    NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
+        : G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
+    bool insert(const Node &N) {
+      size_type Idx = G.getNodeIndex(N);
+      bool AlreadyExists = V.test(Idx);
+      V.set(Idx);
+      return !AlreadyExists;
+    }
+    void erase(const Node &N) {
+      size_type Idx = G.getNodeIndex(N);
+      V.reset(Idx);
+    }
+    bool contains(const Node &N) const {
+      size_type Idx = G.getNodeIndex(N);
+      return V.test(Idx);
+    }
+    void clear() { V.reset(); }
+    size_type empty() const { return V.none(); }
+    /// Return the number of elements in the set
+    size_type count() const { return V.count(); }
+    /// Return the size of the set's domain
+    size_type size() const { return V.size(); }
+    /// Set union
+    NodeSet &operator|=(const NodeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V |= RHS.V;
+      return *this;
+    }
+    /// Set intersection
+    NodeSet &operator&=(const NodeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V &= RHS.V;
+      return *this;
+    }
+    /// Set disjoint union
+    NodeSet &operator^=(const NodeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V ^= RHS.V;
+      return *this;
+    }
+
+    using index_iterator = typename BitVector::const_set_bits_iterator;
+    index_iterator index_begin() const { return V.set_bits_begin(); }
+    index_iterator index_end() const { return V.set_bits_end(); }
+    void set(size_type Idx) { V.set(Idx); }
+    void reset(size_type Idx) { V.reset(Idx); }
+
+    class iterator {
+      const NodeSet &Set;
+      size_type Current;
+
+      void advance() {
+        assert(Current != -1);
+        Current = Set.V.find_next(Current);
+      }
+
+    public:
+      iterator(const NodeSet &Set, size_type Begin)
+          : Set{Set}, Current{Begin} {}
+      iterator operator++(int) {
+        iterator Tmp = *this;
+        advance();
+        return Tmp;
+      }
+      iterator &operator++() {
+        advance();
+        return *this;
+      }
+      Node *operator*() const {
+        assert(Current != -1);
+        return Set.G.nodes_begin() + Current;
+      }
+      bool operator==(const iterator &other) const {
+        assert(&this->Set == &other.Set);
+        return this->Current == other.Current;
+      }
+      bool operator!=(const iterator &other) const { return !(*this == other); }
+    };
+
+    iterator begin() const { return iterator{*this, V.find_first()}; }
+    iterator end() const { return iterator{*this, -1}; }
+  };
+
+  class EdgeSet {
+    const ImmutableGraph &G;
+    BitVector V;
+
+  public:
+    EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
+        : G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
+    bool insert(const Edge &E) {
+      size_type Idx = G.getEdgeIndex(E);
+      bool AlreadyExists = V.test(Idx);
+      V.set(Idx);
+      return !AlreadyExists;
+    }
+    void erase(const Edge &E) {
+      size_type Idx = G.getEdgeIndex(E);
+      V.reset(Idx);
+    }
+    bool contains(const Edge &E) const {
+      size_type Idx = G.getEdgeIndex(E);
+      return V.test(Idx);
+    }
+    void clear() { V.reset(); }
+    bool empty() const { return V.none(); }
+    /// Return the number of elements in the set
+    size_type count() const { return V.count(); }
+    /// Return the size of the set's domain
+    size_type size() const { return V.size(); }
+    /// Set union
+    EdgeSet &operator|=(const EdgeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V |= RHS.V;
+      return *this;
+    }
+    /// Set intersection
+    EdgeSet &operator&=(const EdgeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V &= RHS.V;
+      return *this;
+    }
+    /// Set disjoint union
+    EdgeSet &operator^=(const EdgeSet &RHS) {
+      assert(&this->G == &RHS.G);
+      V ^= RHS.V;
+      return *this;
+    }
+
+    using index_iterator = typename BitVector::const_set_bits_iterator;
+    index_iterator index_begin() const { return V.set_bits_begin(); }
+    index_iterator index_end() const { return V.set_bits_end(); }
+    void set(size_type Idx) { V.set(Idx); }
+    void reset(size_type Idx) { V.reset(Idx); }
+
+    class iterator {
+      const EdgeSet &Set;
+      size_type Current;
+
+      void advance() {
+        assert(Current != -1);
+        Current = Set.V.find_next(Current);
+      }
+
+    public:
+      iterator(const EdgeSet &Set, size_type Begin)
+          : Set{Set}, Current{Begin} {}
+      iterator operator++(int) {
+        iterator Tmp = *this;
+        advance();
+        return Tmp;
+      }
+      iterator &operator++() {
+        advance();
+        return *this;
+      }
+      Edge *operator*() const {
+        assert(Current != -1);
+        return Set.G.edges_begin() + Current;
+      }
+      bool operator==(const iterator &other) const {
+        assert(&this->Set == &other.Set);
+        return this->Current == other.Current;
+      }
+      bool operator!=(const iterator &other) const { return !(*this == other); }
+    };
+
+    iterator begin() const { return iterator{*this, V.find_first()}; }
+    iterator end() const { return iterator{*this, -1}; }
+  };
+
+private:
+  std::unique_ptr<Node[]> Nodes;
+  std::unique_ptr<Edge[]> Edges;
+  size_type NodesSize;
+  size_type EdgesSize;
+};
+
+template <typename GraphT> class ImmutableGraphBuilder {
+  using node_value_type = typename GraphT::node_value_type;
+  using edge_value_type = typename GraphT::edge_value_type;
+  static_assert(
+      std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
+                      GraphT>::value,
+      "Template argument to ImmutableGraphBuilder must derive from "
+      "ImmutableGraph<>");
+  using size_type = typename GraphT::size_type;
+  using NodeSet = typename GraphT::NodeSet;
+  using Node = typename GraphT::Node;
+  using EdgeSet = typename GraphT::EdgeSet;
+  using Edge = typename GraphT::Edge;
+  using BuilderEdge = std::pair<edge_value_type, size_type>;
+  using EdgeList = std::vector<BuilderEdge>;
+  using BuilderVertex = std::pair<node_value_type, EdgeList>;
+  using VertexVec = std::vector<BuilderVertex>;
+
+public:
+  using BuilderNodeRef = size_type;
+
+  BuilderNodeRef addVertex(const node_value_type &V) {
+    auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
+    return std::distance(AdjList.begin(), I);
+  }
+
+  void addEdge(const edge_value_type &E, BuilderNodeRef From,
+               BuilderNodeRef To) {
+    AdjList[From].second.emplace_back(E, To);
+  }
+
+  bool empty() const { return AdjList.empty(); }
+
+  template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
+    size_type VertexSize = AdjList.size(), EdgeSize = 0;
+    for (const auto &V : AdjList) {
+      EdgeSize += V.second.size();
+    }
+    auto VertexArray =
+        std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
+    auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
+    size_type VI = 0, EI = 0;
+    for (; VI < VertexSize; ++VI) {
+      VertexArray[VI].Value = std::move(AdjList[VI].first);
+      VertexArray[VI].Edges = &EdgeArray[EI];
+      auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
+      for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
+        auto &E = AdjList[VI].second[VEI];
+        EdgeArray[EI].Value = std::move(E.first);
+        EdgeArray[EI].Dest = &VertexArray[E.second];
+      }
+    }
+    assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
+    VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
+    return std::make_unique<GraphT>(std::move(VertexArray),
+                                    std::move(EdgeArray), VertexSize, EdgeSize,
+                                    std::forward<ArgT>(Args)...);
+  }
+
+  template <typename... ArgT>
+  static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
+                                      const EdgeSet &TrimEdges,
+                                      ArgT &&... Args) {
+    size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
+    size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
+    auto NewVertexArray =
+        std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
+    auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
+
+    // Walk the nodes and determine the new index for each node.
+    size_type NewNodeIndex = 0;
+    std::vector<size_type> RemappedNodeIndex(G.nodes_size());
+    for (const Node &N : G.nodes()) {
+      if (TrimNodes.contains(N))
+        continue;
+      RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
+    }
+    assert(NewNodeIndex == NewVertexSize &&
+           "Should have assigned NewVertexSize indices");
+
+    size_type VertexI = 0, EdgeI = 0;
+    for (const Node &N : G.nodes()) {
+      if (TrimNodes.contains(N))
+        continue;
+      NewVertexArray[VertexI].Value = N.getValue();
+      NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
+      for (const Edge &E : N.edges()) {
+        if (TrimEdges.contains(E))
+          continue;
+        NewEdgeArray[EdgeI].Value = E.getValue();
+        size_type DestIdx = G.getNodeIndex(*E.getDest());
+        size_type NewIdx = RemappedNodeIndex[DestIdx];
+        assert(NewIdx < NewVertexSize);
+        NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
+        ++EdgeI;
+      }
+      ++VertexI;
+    }
+    assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
+           "Gadget graph malformed");
+    NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
+    return std::make_unique<GraphT>(std::move(NewVertexArray),
+                                    std::move(NewEdgeArray), NewVertexSize,
+                                    NewEdgeSize, std::forward<ArgT>(Args)...);
+  }
+
+private:
+  VertexVec AdjList;
+};
+
+template <typename NodeValueT, typename EdgeValueT>
+struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
+  using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
+  using NodeRef = typename GraphT::Node const *;
+  using EdgeRef = typename GraphT::Edge const &;
+
+  static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
+  using ChildIteratorType =
+      mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
+
+  static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return {N->edges_begin(), &edge_dest};
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return {N->edges_end(), &edge_dest};
+  }
+
+  static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
+  using nodes_iterator =
+      mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
+  static nodes_iterator nodes_begin(GraphT *G) {
+    return {G->nodes_begin(), &getNode};
+  }
+  static nodes_iterator nodes_end(GraphT *G) {
+    return {G->nodes_end(), &getNode};
+  }
+
+  using ChildEdgeIteratorType = typename GraphT::Edge const *;
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->edges_begin();
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) {
+    return N->edges_end();
+  }
+  static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.h
@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
 FunctionPass *createX86EvexToVexInsts();

 /// This pass creates the thunks for the retpoline feature.
-FunctionPass *createX86RetpolineThunksPass();
+FunctionPass *createX86IndirectThunksPass();

 /// This pass ensures instructions featuring a memory operand
 /// have distinctive <LineNumber, Discriminator> (with respect to eachother)
@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
                                                  X86Subtarget &,
                                                  X86RegisterBankInfo &);

+FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
+FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
+FunctionPass *createX86LoadValueInjectionRetHardeningPass();
 FunctionPass *createX86SpeculativeLoadHardeningPass();

 void initializeEvexToVexInstPassPass(PassRegistry &);
@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
 void initializeX86ExecutionDomainFixPass(PassRegistry &);
 void initializeX86ExpandPseudoPass(PassRegistry &);
 void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
+void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
 void initializeX86OptimizeLEAPassPass(PassRegistry &);
 void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);

--- a/contrib/llvm-project/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86.td
@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
          "ourselves. Only has effect when combined with some other retpoline "
          "feature", [FeatureRetpolineIndirectCalls]>;

+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+    : SubtargetFeature<
+          "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+          "Prevent indirect calls/branches from using a memory operand, and "
+          "precede all indirect calls/branches from a register with an "
+          "LFENCE instruction to serialize control flow. Also decompose RET "
+          "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+    : SubtargetFeature<
+          "lvi-load-hardening", "UseLVILoadHardening", "true",
+          "Insert LFENCE instructions to prevent data speculatively injected "
+          "into loads from being used maliciously.">;
+
 // Direct Move instructions.
 def FeatureMOVDIRI  : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
                                       "Support movdiri instruction">;
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FastISel.cpp
@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
      (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
    return false;

-  // Functions using retpoline for indirect calls need to use SDISel.
-  if (Subtarget->useRetpolineIndirectCalls())
+  // Functions using thunks for indirect calls need to use SDISel.
+  if (Subtarget->useIndirectThunkCalls())
    return false;

  // Handle only C, fastcc, and webkit_js calling conventions for now.
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FrameLowering.cpp
@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
                                          bool InProlog) const {
  bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;

-  // FIXME: Add retpoline support and remove this.
-  if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
+  // FIXME: Add indirect thunk support and remove this.
+  if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
    report_fatal_error("Emitting stack probe calls on 64-bit with the large "
-                       "code model and retpoline not yet implemented.");
+                       "code model and indirect thunks not yet implemented.");

  unsigned CallOp;
  if (Is64Bit)
@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
    // is laid out within 2^31 bytes of each function body, but this seems
    // to be sufficient for JIT.
    // FIXME: Add retpoline support and remove the error here..
-    if (STI.useRetpolineIndirectCalls())
+    if (STI.useIndirectThunkCalls())
      report_fatal_error("Emitting morestack calls on 64-bit with the large "
-                         "code model and retpoline not yet implemented.");
+                         "code model and thunks not yet implemented.");
    BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
        .addReg(X86::RIP)
        .addImm(0)
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
    if (OptLevel != CodeGenOpt::None &&
        // Only do this when the target can fold the load into the call or
        // jmp.
-        !Subtarget->useRetpolineIndirectCalls() &&
+        !Subtarget->useIndirectThunkCalls() &&
        ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
         (N->getOpcode() == X86ISD::TC_RETURN &&
          (Subtarget->is64Bit() ||
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
 }

 bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
-  // If the subtarget is using retpolines, we need to not generate jump tables.
-  if (Subtarget.useRetpolineIndirectBranches())
+  // If the subtarget is using thunks, we need to not generate jump tables.
+  if (Subtarget.useIndirectThunkBranches())
    return false;

  // Otherwise, fallback on the generic logic.
@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
  return BB;
 }

-static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
+static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
  switch (RPOpc) {
-  case X86::RETPOLINE_CALL32:
+  case X86::INDIRECT_THUNK_CALL32:
    return X86::CALLpcrel32;
-  case X86::RETPOLINE_CALL64:
+  case X86::INDIRECT_THUNK_CALL64:
    return X86::CALL64pcrel32;
-  case X86::RETPOLINE_TCRETURN32:
+  case X86::INDIRECT_THUNK_TCRETURN32:
    return X86::TCRETURNdi;
-  case X86::RETPOLINE_TCRETURN64:
+  case X86::INDIRECT_THUNK_TCRETURN64:
    return X86::TCRETURNdi64;
  }
-  llvm_unreachable("not retpoline opcode");
+  llvm_unreachable("not indirect thunk opcode");
 }

-static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
-                                      unsigned Reg) {
+static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
+                                          unsigned Reg) {
  if (Subtarget.useRetpolineExternalThunk()) {
    // When using an external thunk for retpolines, we pick names that match the
    // names GCC happens to use as well. This helps simplify the implementation
@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
      return "__x86_indirect_thunk_r11";
    }
+    llvm_unreachable("unexpected reg for external indirect thunk");
+  }
+
+  if (Subtarget.useRetpolineIndirectCalls() ||
+      Subtarget.useRetpolineIndirectBranches()) {
+    // When targeting an internal COMDAT thunk use an LLVM-specific name.
+    switch (Reg) {
+    case X86::EAX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__llvm_retpoline_eax";
+    case X86::ECX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__llvm_retpoline_ecx";
+    case X86::EDX:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__llvm_retpoline_edx";
+    case X86::EDI:
+      assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
+      return "__llvm_retpoline_edi";
+    case X86::R11:
+      assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
+      return "__llvm_retpoline_r11";
+    }
    llvm_unreachable("unexpected reg for retpoline");
  }

-  // When targeting an internal COMDAT thunk use an LLVM-specific name.
-  switch (Reg) {
-  case X86::EAX:
-    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
-    return "__llvm_retpoline_eax";
-  case X86::ECX:
-    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
-    return "__llvm_retpoline_ecx";
-  case X86::EDX:
-    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
-    return "__llvm_retpoline_edx";
-  case X86::EDI:
-    assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
-    return "__llvm_retpoline_edi";
-  case X86::R11:
+  if (Subtarget.useLVIControlFlowIntegrity()) {
    assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
-    return "__llvm_retpoline_r11";
+    return "__llvm_lvi_thunk_r11";
  }
-  llvm_unreachable("unexpected reg for retpoline");
+  llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
 }

 MachineBasicBlock *
-X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
-                                        MachineBasicBlock *BB) const {
+X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
+                                            MachineBasicBlock *BB) const {
  // Copy the virtual register into the R11 physical register and
  // call the retpoline thunk.
  DebugLoc DL = MI.getDebugLoc();
  const X86InstrInfo *TII = Subtarget.getInstrInfo();
  Register CalleeVReg = MI.getOperand(0).getReg();
-  unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
+  unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());

  // Find an available scratch register to hold the callee. On 64-bit, we can
  // just use R11, but we scan for uses anyway to ensure we don't generate
@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
    report_fatal_error("calling convention incompatible with retpoline, no "
                       "available registers");

-  const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
+  const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);

  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
      .addReg(CalleeVReg);
@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
  case X86::TLS_base_addr32:
  case X86::TLS_base_addr64:
    return EmitLoweredTLSAddr(MI, BB);
-  case X86::RETPOLINE_CALL32:
-  case X86::RETPOLINE_CALL64:
-  case X86::RETPOLINE_TCRETURN32:
-  case X86::RETPOLINE_TCRETURN64:
-    return EmitLoweredRetpoline(MI, BB);
+  case X86::INDIRECT_THUNK_CALL32:
+  case X86::INDIRECT_THUNK_CALL64:
+  case X86::INDIRECT_THUNK_TCRETURN32:
+  case X86::INDIRECT_THUNK_TCRETURN64:
+    return EmitLoweredIndirectThunk(MI, BB);
  case X86::CATCHRET:
    return EmitLoweredCatchRet(MI, BB);
  case X86::CATCHPAD:
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h
@ -1482,8 +1482,8 @@ namespace llvm {
    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
                                          MachineBasicBlock *BB) const;

-    MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
-                                            MachineBasicBlock *BB) const;
+    MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
+                                                MachineBasicBlock *BB) const;

    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
                                        MachineBasicBlock *MBB) const;
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
@ -0,0 +1,364 @@
+//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86  --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Pass that injects an MI thunk that is used to lower indirect calls in a way
+/// that prevents speculation on some x86 processors and can be used to mitigate
+/// security vulnerabilities due to targeted speculative execution and side
+/// channels such as CVE-2017-5715.
+///
+/// Currently supported thunks include:
+/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
+/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
+///   before making an indirect call/jump
+///
+/// Note that the reason that this is implemented as a MachineFunctionPass and
+/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
+/// serialize all transformations, which can consume lots of memory.
+///
+/// TODO(chandlerc): All of this code could use better comments and
+/// documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-retpoline-thunks"
+
+static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
+static const char R11RetpolineName[] = "__llvm_retpoline_r11";
+static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
+static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
+static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
+static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
+
+static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
+static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
+
+namespace {
+template <typename Derived> class ThunkInserter {
+  Derived &getDerived() { return *static_cast<Derived *>(this); }
+
+protected:
+  bool InsertedThunks;
+  void doInitialization(Module &M) {}
+  void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
+
+public:
+  void init(Module &M) {
+    InsertedThunks = false;
+    getDerived().doInitialization(M);
+  }
+  // return `true` if `MMI` or `MF` was modified
+  bool run(MachineModuleInfo &MMI, MachineFunction &MF);
+};
+
+struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
+  const char *getThunkPrefix() { return RetpolineNamePrefix; }
+  bool mayUseThunk(const MachineFunction &MF) {
+    const auto &STI = MF.getSubtarget<X86Subtarget>();
+    return (STI.useRetpolineIndirectCalls() ||
+            STI.useRetpolineIndirectBranches()) &&
+           !STI.useRetpolineExternalThunk();
+  }
+  void insertThunks(MachineModuleInfo &MMI);
+  void populateThunk(MachineFunction &MF);
+};
+
+struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
+  const char *getThunkPrefix() { return LVIThunkNamePrefix; }
+  bool mayUseThunk(const MachineFunction &MF) {
+    return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
+  }
+  void insertThunks(MachineModuleInfo &MMI) {
+    createThunkFunction(MMI, R11LVIThunkName);
+  }
+  void populateThunk(MachineFunction &MF) {
+    // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+    // generate two bbs for the entry block.
+    MachineBasicBlock *Entry = &MF.front();
+    Entry->clear();
+    while (MF.size() > 1)
+      MF.erase(std::next(MF.begin()));
+
+    // This code mitigates LVI by replacing each indirect call/jump with a
+    // direct call/jump to a thunk that looks like:
+    // ```
+    // lfence
+    // jmpq *%r11
+    // ```
+    // This ensures that if the value in register %r11 was loaded from memory,
+    // then the value in %r11 is (architecturally) correct prior to the jump.
+    const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+    BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
+    BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
+    MF.front().addLiveIn(X86::R11);
+    return;
+  }
+};
+
+class X86IndirectThunks : public MachineFunctionPass {
+public:
+  static char ID;
+
+  X86IndirectThunks() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "X86 Indirect Thunks"; }
+
+  bool doInitialization(Module &M) override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+    AU.addRequired<MachineModuleInfoWrapperPass>();
+    AU.addPreserved<MachineModuleInfoWrapperPass>();
+  }
+
+private:
+  std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
+
+  // FIXME: When LLVM moves to C++17, these can become folds
+  template <typename... ThunkInserterT>
+  static void initTIs(Module &M,
+                      std::tuple<ThunkInserterT...> &ThunkInserters) {
+    (void)std::initializer_list<int>{
+        (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
+  }
+  template <typename... ThunkInserterT>
+  static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
+                     std::tuple<ThunkInserterT...> &ThunkInserters) {
+    bool Modified = false;
+    (void)std::initializer_list<int>{
+        Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
+    return Modified;
+  }
+};
+
+} // end anonymous namespace
+
+void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
+  if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
+    createThunkFunction(MMI, R11RetpolineName);
+  else
+    for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
+                           EDIRetpolineName})
+      createThunkFunction(MMI, Name);
+}
+
+void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
+  bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
+  Register ThunkReg;
+  if (Is64Bit) {
+    assert(MF.getName() == "__llvm_retpoline_r11" &&
+           "Should only have an r11 thunk on 64-bit targets");
+
+    // __llvm_retpoline_r11:
+    //   callq .Lr11_call_target
+    // .Lr11_capture_spec:
+    //   pause
+    //   lfence
+    //   jmp .Lr11_capture_spec
+    // .align 16
+    // .Lr11_call_target:
+    //   movq %r11, (%rsp)
+    //   retq
+    ThunkReg = X86::R11;
+  } else {
+    // For 32-bit targets we need to emit a collection of thunks for various
+    // possible scratch registers as well as a fallback that uses EDI, which is
+    // normally callee saved.
+    //   __llvm_retpoline_eax:
+    //         calll .Leax_call_target
+    //   .Leax_capture_spec:
+    //         pause
+    //         jmp .Leax_capture_spec
+    //   .align 16
+    //   .Leax_call_target:
+    //         movl %eax, (%esp)  # Clobber return addr
+    //         retl
+    //
+    //   __llvm_retpoline_ecx:
+    //   ... # Same setup
+    //         movl %ecx, (%esp)
+    //         retl
+    //
+    //   __llvm_retpoline_edx:
+    //   ... # Same setup
+    //         movl %edx, (%esp)
+    //         retl
+    //
+    //   __llvm_retpoline_edi:
+    //   ... # Same setup
+    //         movl %edi, (%esp)
+    //         retl
+    if (MF.getName() == EAXRetpolineName)
+      ThunkReg = X86::EAX;
+    else if (MF.getName() == ECXRetpolineName)
+      ThunkReg = X86::ECX;
+    else if (MF.getName() == EDXRetpolineName)
+      ThunkReg = X86::EDX;
+    else if (MF.getName() == EDIRetpolineName)
+      ThunkReg = X86::EDI;
+    else
+      llvm_unreachable("Invalid thunk name on x86-32!");
+  }
+
+  const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
+  // generate two bbs for the entry block.
+  MachineBasicBlock *Entry = &MF.front();
+  Entry->clear();
+  while (MF.size() > 1)
+    MF.erase(std::next(MF.begin()));
+
+  MachineBasicBlock *CaptureSpec =
+      MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MachineBasicBlock *CallTarget =
+      MF.CreateMachineBasicBlock(Entry->getBasicBlock());
+  MCSymbol *TargetSym = MF.getContext().createTempSymbol();
+  MF.push_back(CaptureSpec);
+  MF.push_back(CallTarget);
+
+  const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+  const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+
+  Entry->addLiveIn(ThunkReg);
+  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
+
+  // The MIR verifier thinks that the CALL in the entry block will fall through
+  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
+  // the successor, but the MIR verifier doesn't know how to cope with that.
+  Entry->addSuccessor(CaptureSpec);
+
+  // In the capture loop for speculation, we want to stop the processor from
+  // speculating as fast as possible. On Intel processors, the PAUSE instruction
+  // will block speculation without consuming any execution resources. On AMD
+  // processors, the PAUSE instruction is (essentially) a nop, so we also use an
+  // LFENCE instruction which they have advised will stop speculation as well
+  // with minimal resource utilization. We still end the capture with a jump to
+  // form an infinite loop to fully guarantee that no matter what implementation
+  // of the x86 ISA, speculating this code path never escapes.
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
+  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
+  CaptureSpec->setHasAddressTaken();
+  CaptureSpec->addSuccessor(CaptureSpec);
+
+  CallTarget->addLiveIn(ThunkReg);
+  CallTarget->setHasAddressTaken();
+  CallTarget->setAlignment(Align(16));
+
+  // Insert return address clobber
+  const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+  const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
+  addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
+               0)
+      .addReg(ThunkReg);
+
+  CallTarget->back().setPreInstrSymbol(MF, TargetSym);
+  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
+}
+
+template <typename Derived>
+void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
+                                                 StringRef Name) {
+  assert(Name.startswith(getDerived().getThunkPrefix()) &&
+         "Created a thunk with an unexpected prefix!");
+
+  Module &M = const_cast<Module &>(*MMI.getModule());
+  LLVMContext &Ctx = M.getContext();
+  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
+  Function *F =
+      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
+  F->setVisibility(GlobalValue::HiddenVisibility);
+  F->setComdat(M.getOrInsertComdat(Name));
+
+  // Add Attributes so that we don't create a frame, unwind information, or
+  // inline.
+  AttrBuilder B;
+  B.addAttribute(llvm::Attribute::NoUnwind);
+  B.addAttribute(llvm::Attribute::Naked);
+  F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+
+  // Populate our function a bit so that we can verify.
+  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+  IRBuilder<> Builder(Entry);
+
+  Builder.CreateRetVoid();
+
+  // MachineFunctions/MachineBasicBlocks aren't created automatically for the
+  // IR-level constructs we already made. Create them and insert them into the
+  // module.
+  MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+  MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
+
+  // Insert EntryMBB into MF. It's not in the module until we do this.
+  MF.insert(MF.end(), EntryMBB);
+  // Set MF properties. We never use vregs...
+  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+template <typename Derived>
+bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
+  // If MF is not a thunk, check to see if we need to insert a thunk.
+  if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
+    // If we've already inserted a thunk, nothing else to do.
+    if (InsertedThunks)
+      return false;
+
+    // Only add a thunk if one of the functions has the corresponding feature
+    // enabled in its subtarget, and doesn't enable external thunks.
+    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
+    // nothing will end up calling it.
+    // FIXME: It's a little silly to look at every function just to enumerate
+    // the subtargets, but eventually we'll want to look at them for indirect
+    // calls, so maybe this is OK.
+    if (!getDerived().mayUseThunk(MF))
+      return false;
+
+    getDerived().insertThunks(MMI);
+    InsertedThunks = true;
+    return true;
+  }
+
+  // If this *is* a thunk function, we need to populate it with the correct MI.
+  getDerived().populateThunk(MF);
+  return true;
+}
+
+FunctionPass *llvm::createX86IndirectThunksPass() {
+  return new X86IndirectThunks();
+}
+
+char X86IndirectThunks::ID = 0;
+
+bool X86IndirectThunks::doInitialization(Module &M) {
+  initTIs(M, TIs);
+  return false;
+}
+
+bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << getPassName() << '\n');
+  auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+  return runTIs(MMI, MF, TIs);
+}
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
          (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
+          Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;

 // FIXME: This is disabled for 32-bit PIC mode because the global base
 // register which is part of the address mode may be assigned a
 // callee-saved register.
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
          (TCRETURNmi addr:$dst, imm:$off)>,
-          Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
+          Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;

 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
          (TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
          (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;

 // Don't fold loads into X86tcret requiring more than 6 regs.
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
          (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
+          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
-          (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
+          (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode, UseIndirectThunkCalls]>;

 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
-          (RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
+          (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[Not64BitMode, UseIndirectThunkCalls]>;

 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrControl.td
@ -237,13 +237,13 @@ let isCall = 1 in
                        Sched<[WriteJumpLd]>;
    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
-                        Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
+                        Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
                        Sched<[WriteJump]>;
    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
                        OpSize32,
                        Requires<[Not64BitMode,FavorMemIndirectCall,
-                                  NotUseRetpolineIndirectCalls]>,
+                                  NotUseIndirectThunkCalls]>,
                        Sched<[WriteJumpLd]>;

    // Non-tracking calls for IBT, use with caution.
@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
                      Requires<[In64BitMode]>;
  def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
                        "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                      Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
+                      Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
  def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                        "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
                      Requires<[In64BitMode,FavorMemIndirectCall,
-                                NotUseRetpolineIndirectCalls]>;
+                                NotUseIndirectThunkCalls]>;

  // Non-tracking calls for IBT, use with caution.
  let isCodeGenOnly = 1 in {
@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
    Uses = [RSP, SSP],
    usesCustomInserter = 1,
    SchedRW = [WriteJump] in {
-  def RETPOLINE_CALL32 :
+  def INDIRECT_THUNK_CALL32 :
    PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
-            Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
+            Requires<[Not64BitMode,UseIndirectThunkCalls]>;

-  def RETPOLINE_CALL64 :
+  def INDIRECT_THUNK_CALL64 :
    PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
-            Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
+            Requires<[In64BitMode,UseIndirectThunkCalls]>;

-  // Retpoline variant of indirect tail calls.
+  // Indirect thunk variant of indirect tail calls.
  let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-    def RETPOLINE_TCRETURN64 :
+    def INDIRECT_THUNK_TCRETURN64 :
      PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
-    def RETPOLINE_TCRETURN32 :
+    def INDIRECT_THUNK_TCRETURN32 :
      PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
  }
 }
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrInfo.td
@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
 def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
-def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
-def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
+def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
+def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;

 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@ -0,0 +1,900 @@
+//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
+/// of a load from memory (i.e., SOURCE), and any operation that may transmit
+/// the value loaded from memory over a covert channel, or use the value loaded
+/// from memory to determine a branch/call target (i.e., SINK). After finding
+/// all such gadgets in a given function, the pass minimally inserts LFENCE
+/// instructions in such a manner that the following property is satisfied: for
+/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
+/// least one LFENCE instruction. The algorithm that implements this minimal
+/// insertion is influenced by an academic paper that minimally inserts memory
+/// fences for high-performance concurrent programs:
+///         http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
+/// The algorithm implemented in this pass is as follows:
+/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
+/// following components:
+///    - SOURCE instructions (also includes function arguments)
+///    - SINK instructions
+///    - Basic block entry points
+///    - Basic block terminators
+///    - LFENCE instructions
+/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
+/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
+/// mitigated, go to step 6.
+/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
+/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
+/// 5. Go to step 2.
+/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
+/// to tell LLVM that the function was modified.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ImmutableGraph.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-load"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+                                 "were deployed");
+STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
+
+static cl::opt<std::string> OptimizePluginPath(
+    PASS_KEY "-opt-plugin",
+    cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
+
+static cl::opt<bool> NoConditionalBranches(
+    PASS_KEY "-no-cbranch",
+    cl::desc("Don't treat conditional branches as disclosure gadgets. This "
+             "may improve performance, at the cost of security."),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDot(
+    PASS_KEY "-dot",
+    cl::desc(
+        "For each function, emit a dot graph depicting potential LVI gadgets"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotOnly(
+    PASS_KEY "-dot-only",
+    cl::desc("For each function, emit a dot graph depicting potential LVI "
+             "gadgets, and do not insert any fences"),
+    cl::init(false), cl::Hidden);
+
+static cl::opt<bool> EmitDotVerify(
+    PASS_KEY "-dot-verify",
+    cl::desc("For each function, emit a dot graph to stdout depicting "
+             "potential LVI gadgets, used for testing purposes only"),
+    cl::init(false), cl::Hidden);
+
+static llvm::sys::DynamicLibrary OptimizeDL;
+typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
+                            unsigned int *edges, int *edge_values,
+                            int *cut_edges /* out */, unsigned int edges_size);
+static OptimizeCutT OptimizeCut = nullptr;
+
+namespace {
+
+struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
+  static constexpr int GadgetEdgeSentinel = -1;
+  static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
+
+  using GraphT = ImmutableGraph<MachineInstr *, int>;
+  using Node = typename GraphT::Node;
+  using Edge = typename GraphT::Edge;
+  using size_type = typename GraphT::size_type;
+  MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
+                     std::unique_ptr<Edge[]> Edges, size_type NodesSize,
+                     size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
+      : GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
+        NumFences(NumFences), NumGadgets(NumGadgets) {}
+  static inline bool isCFGEdge(const Edge &E) {
+    return E.getValue() != GadgetEdgeSentinel;
+  }
+  static inline bool isGadgetEdge(const Edge &E) {
+    return E.getValue() == GadgetEdgeSentinel;
+  }
+  int NumFences;
+  int NumGadgets;
+};
+
+class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Load Hardening";
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+
+private:
+  using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
+  using EdgeSet = MachineGadgetGraph::EdgeSet;
+  using NodeSet = MachineGadgetGraph::NodeSet;
+  using Gadget = std::pair<MachineInstr *, MachineInstr *>;
+
+  const X86Subtarget *STI;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+
+  std::unique_ptr<MachineGadgetGraph>
+  getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
+                 const MachineDominatorTree &MDT,
+                 const MachineDominanceFrontier &MDF) const;
+  int hardenLoadsWithPlugin(MachineFunction &MF,
+                            std::unique_ptr<MachineGadgetGraph> Graph) const;
+  int hardenLoadsWithGreedyHeuristic(
+      MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
+  int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
+                                 EdgeSet &ElimEdges /* in, out */,
+                                 NodeSet &ElimNodes /* in, out */) const;
+  std::unique_ptr<MachineGadgetGraph>
+  trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
+  void findAndCutEdges(MachineGadgetGraph &G,
+                       EdgeSet &CutEdges /* out */) const;
+  int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
+                   EdgeSet &CutEdges /* in, out */) const;
+  bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
+  bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
+  inline bool isFence(const MachineInstr *MI) const {
+    return MI && (MI->getOpcode() == X86::LFENCE ||
+                  (STI->useLVIControlFlowIntegrity() && MI->isCall()));
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+struct GraphTraits<MachineGadgetGraph *>
+    : GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
+
+template <>
+struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
+  using GraphType = MachineGadgetGraph;
+  using Traits = llvm::GraphTraits<GraphType *>;
+  using NodeRef = typename Traits::NodeRef;
+  using EdgeRef = typename Traits::EdgeRef;
+  using ChildIteratorType = typename Traits::ChildIteratorType;
+  using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
+
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(NodeRef Node, GraphType *) {
+    if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
+      return "ARGS";
+
+    std::string Str;
+    raw_string_ostream OS(Str);
+    OS << *Node->getValue();
+    return OS.str();
+  }
+
+  static std::string getNodeAttributes(NodeRef Node, GraphType *) {
+    MachineInstr *MI = Node->getValue();
+    if (MI == MachineGadgetGraph::ArgNodeSentinel)
+      return "color = blue";
+    if (MI->getOpcode() == X86::LFENCE)
+      return "color = green";
+    return "";
+  }
+
+  static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
+                                       GraphType *) {
+    int EdgeVal = (*E.getCurrent()).getValue();
+    return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
+                        : "color = red, style = \"dashed\"";
+  }
+};
+
+} // end namespace llvm
+
+constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
+constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
+
+char X86LoadValueInjectionLoadHardeningPass::ID = 0;
+
+void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachineDominanceFrontier>();
+  AU.setPreservesCFG();
+}
+
+static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
+                             MachineGadgetGraph *G) {
+  WriteGraph(OS, G, /*ShortNames*/ false,
+             "Speculative gadgets for \"" + MF.getName() + "\" function");
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  STI = &MF.getSubtarget<X86Subtarget>();
+  if (!STI->useLVILoadHardening())
+    return false;
+
+  // FIXME: support 32-bit
+  if (!STI->is64Bit())
+    report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F))
+    return false;
+
+  ++NumFunctionsConsidered;
+  TII = STI->getInstrInfo();
+  TRI = STI->getRegisterInfo();
+  LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
+  const auto &MLI = getAnalysis<MachineLoopInfo>();
+  const auto &MDT = getAnalysis<MachineDominatorTree>();
+  const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+  std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
+  LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
+  if (Graph == nullptr)
+    return false; // didn't find any gadgets
+
+  if (EmitDotVerify) {
+    WriteGadgetGraph(outs(), MF, Graph.get());
+    return false;
+  }
+
+  if (EmitDot || EmitDotOnly) {
+    LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
+    std::error_code FileError;
+    std::string FileName = "lvi.";
+    FileName += MF.getName();
+    FileName += ".dot";
+    raw_fd_ostream FileOut(FileName, FileError);
+    if (FileError)
+      errs() << FileError.message();
+    WriteGadgetGraph(FileOut, MF, Graph.get());
+    FileOut.close();
+    LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
+    if (EmitDotOnly)
+      return false;
+  }
+
+  int FencesInserted;
+  if (!OptimizePluginPath.empty()) {
+    if (!OptimizeDL.isValid()) {
+      std::string ErrorMsg;
+      OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
+          OptimizePluginPath.c_str(), &ErrorMsg);
+      if (!ErrorMsg.empty())
+        report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+      OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
+      if (!OptimizeCut)
+        report_fatal_error("Invalid optimization plugin");
+    }
+    FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
+  } else { // Use the default greedy heuristic
+    FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
+  }
+
+  if (FencesInserted > 0)
+    ++NumFunctionsMitigated;
+  NumFences += FencesInserted;
+  return (FencesInserted > 0);
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
+    MachineFunction &MF, const MachineLoopInfo &MLI,
+    const MachineDominatorTree &MDT,
+    const MachineDominanceFrontier &MDF) const {
+  using namespace rdf;
+
+  // Build the Register Dataflow Graph using the RDF framework
+  TargetOperandInfo TOI{*TII};
+  DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
+  DFG.build();
+  Liveness L{MF.getRegInfo(), DFG};
+  L.computePhiInfo();
+
+  GraphBuilder Builder;
+  using GraphIter = typename GraphBuilder::BuilderNodeRef;
+  DenseMap<MachineInstr *, GraphIter> NodeMap;
+  int FenceCount = 0, GadgetCount = 0;
+  auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
+    auto Ref = NodeMap.find(MI);
+    if (Ref == NodeMap.end()) {
+      auto I = Builder.addVertex(MI);
+      NodeMap[MI] = I;
+      return std::pair<GraphIter, bool>{I, true};
+    }
+    return std::pair<GraphIter, bool>{Ref->getSecond(), false};
+  };
+
+  // The `Transmitters` map memoizes transmitters found for each def. If a def
+  // has not yet been analyzed, then it will not appear in the map. If a def
+  // has been analyzed and was determined not to have any transmitters, then
+  // its list of transmitters will be empty.
+  DenseMap<NodeId, std::vector<NodeId>> Transmitters;
+
+  // Analyze all machine instructions to find gadgets and LFENCEs, adding
+  // each interesting value to `Nodes`
+  auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
+    SmallSet<NodeId, 8> UsesVisited, DefsVisited;
+    std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
+        [&](NodeAddr<DefNode *> Def) {
+          if (Transmitters.find(Def.Id) != Transmitters.end())
+            return; // Already analyzed `Def`
+
+          // Use RDF to find all the uses of `Def`
+          rdf::NodeSet Uses;
+          RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
+          for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
+            auto Use = DFG.addr<UseNode *>(UseID);
+            if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
+              NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
+              for (auto I : L.getRealUses(Phi.Id)) {
+                if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
+                  for (auto UA : I.second)
+                    Uses.emplace(UA.first);
+                }
+              }
+            } else { // not a phi node
+              Uses.emplace(UseID);
+            }
+          }
+
+          // For each use of `Def`, we want to know whether:
+          // (1) The use can leak the Def'ed value,
+          // (2) The use can further propagate the Def'ed value to more defs
+          for (auto UseID : Uses) {
+            if (!UsesVisited.insert(UseID).second)
+              continue; // Already visited this use of `Def`
+
+            auto Use = DFG.addr<UseNode *>(UseID);
+            assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
+            MachineOperand &UseMO = Use.Addr->getOp();
+            MachineInstr &UseMI = *UseMO.getParent();
+            assert(UseMO.isReg());
+
+            // We naively assume that an instruction propagates any loaded
+            // uses to all defs unless the instruction is a call, in which
+            // case all arguments will be treated as gadget sources during
+            // analysis of the callee function.
+            if (UseMI.isCall())
+              continue;
+
+            // Check whether this use can transmit (leak) its value.
+            if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
+                (!NoConditionalBranches &&
+                 instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
+              Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
+              if (UseMI.mayLoad())
+                continue; // Found a transmitting load -- no need to continue
+                          // traversing its defs (i.e., this load will become
+                          // a new gadget source anyways).
+            }
+
+            // Check whether the use propagates to more defs.
+            NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
+            rdf::NodeList AnalyzedChildDefs;
+            for (auto &ChildDef :
+                 Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
+              if (!DefsVisited.insert(ChildDef.Id).second)
+                continue; // Already visited this def
+              if (Def.Addr->getAttrs() & NodeAttrs::Dead)
+                continue;
+              if (Def.Id == ChildDef.Id)
+                continue; // `Def` uses itself (e.g., increment loop counter)
+
+              AnalyzeDefUseChain(ChildDef);
+
+              // `Def` inherits all of its child defs' transmitters.
+              for (auto TransmitterId : Transmitters[ChildDef.Id])
+                Transmitters[Def.Id].push_back(TransmitterId);
+            }
+          }
+
+          // Note that this statement adds `Def.Id` to the map if no
+          // transmitters were found for `Def`.
+          auto &DefTransmitters = Transmitters[Def.Id];
+
+          // Remove duplicate transmitters
+          llvm::sort(DefTransmitters);
+          DefTransmitters.erase(
+              std::unique(DefTransmitters.begin(), DefTransmitters.end()),
+              DefTransmitters.end());
+        };
+
+    // Find all of the transmitters
+    AnalyzeDefUseChain(SourceDef);
+    auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
+    if (SourceDefTransmitters.empty())
+      return; // No transmitters for `SourceDef`
+
+    MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
+                               ? MachineGadgetGraph::ArgNodeSentinel
+                               : SourceDef.Addr->getOp().getParent();
+    auto GadgetSource = MaybeAddNode(Source);
+    // Each transmitter is a sink for `SourceDef`.
+    for (auto TransmitterId : SourceDefTransmitters) {
+      MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
+      auto GadgetSink = MaybeAddNode(Sink);
+      // Add the gadget edge to the graph.
+      Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
+                      GadgetSource.first, GadgetSink.first);
+      ++GadgetCount;
+    }
+  };
+
+  LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
+  // Analyze function arguments
+  NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
+  for (NodeAddr<PhiNode *> ArgPhi :
+       EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
+    NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
+    llvm::for_each(Defs, AnalyzeDef);
+  }
+  // Analyze every instruction in MF
+  for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
+    for (NodeAddr<StmtNode *> SA :
+         BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
+      MachineInstr *MI = SA.Addr->getCode();
+      if (isFence(MI)) {
+        MaybeAddNode(MI);
+        ++FenceCount;
+      } else if (MI->mayLoad()) {
+        NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
+        llvm::for_each(Defs, AnalyzeDef);
+      }
+    }
+  }
+  LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
+  LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
+  if (GadgetCount == 0)
+    return nullptr;
+  NumGadgets += GadgetCount;
+
+  // Traverse CFG to build the rest of the graph
+  SmallSet<MachineBasicBlock *, 8> BlocksVisited;
+  std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
+      [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
+        unsigned LoopDepth = MLI.getLoopDepth(MBB);
+        if (!MBB->empty()) {
+          // Always add the first instruction in each block
+          auto NI = MBB->begin();
+          auto BeginBB = MaybeAddNode(&*NI);
+          Builder.addEdge(ParentDepth, GI, BeginBB.first);
+          if (!BlocksVisited.insert(MBB).second)
+            return;
+
+          // Add any instructions within the block that are gadget components
+          GI = BeginBB.first;
+          while (++NI != MBB->end()) {
+            auto Ref = NodeMap.find(&*NI);
+            if (Ref != NodeMap.end()) {
+              Builder.addEdge(LoopDepth, GI, Ref->getSecond());
+              GI = Ref->getSecond();
+            }
+          }
+
+          // Always add the terminator instruction, if one exists
+          auto T = MBB->getFirstTerminator();
+          if (T != MBB->end()) {
+            auto EndBB = MaybeAddNode(&*T);
+            if (EndBB.second)
+              Builder.addEdge(LoopDepth, GI, EndBB.first);
+            GI = EndBB.first;
+          }
+        }
+        for (MachineBasicBlock *Succ : MBB->successors())
+          TraverseCFG(Succ, GI, LoopDepth);
+      };
+  // ArgNodeSentinel is a pseudo-instruction that represents MF args in the
+  // GadgetGraph
+  GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
+  TraverseCFG(&MF.front(), ArgNode, 0);
+  std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
+  LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
+  return G;
+}
+
+// Returns the number of remaining gadget edges that could not be eliminated
+int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
+    MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
+    MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
+  if (G.NumFences > 0) {
+    // Eliminate fences and CFG edges that ingress and egress the fence, as
+    // they are trivially mitigated.
+    for (const auto &E : G.edges()) {
+      const MachineGadgetGraph::Node *Dest = E.getDest();
+      if (isFence(Dest->getValue())) {
+        ElimNodes.insert(*Dest);
+        ElimEdges.insert(E);
+        for (const auto &DE : Dest->edges())
+          ElimEdges.insert(DE);
+      }
+    }
+  }
+
+  // Find and eliminate gadget edges that have been mitigated.
+  int MitigatedGadgets = 0, RemainingGadgets = 0;
+  MachineGadgetGraph::NodeSet ReachableNodes{G};
+  for (const auto &RootN : G.nodes()) {
+    if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
+      continue; // skip this node if it isn't a gadget source
+
+    // Find all of the nodes that are CFG-reachable from RootN using DFS
+    ReachableNodes.clear();
+    std::function<void(const MachineGadgetGraph::Node *, bool)>
+        FindReachableNodes =
+            [&](const MachineGadgetGraph::Node *N, bool FirstNode) {
+              if (!FirstNode)
+                ReachableNodes.insert(*N);
+              for (const auto &E : N->edges()) {
+                const MachineGadgetGraph::Node *Dest = E.getDest();
+                if (MachineGadgetGraph::isCFGEdge(E) &&
+                    !ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
+                  FindReachableNodes(Dest, false);
+              }
+            };
+    FindReachableNodes(&RootN, true);
+
+    // Any gadget whose sink is unreachable has been mitigated
+    for (const auto &E : RootN.edges()) {
+      if (MachineGadgetGraph::isGadgetEdge(E)) {
+        if (ReachableNodes.contains(*E.getDest())) {
+          // This gadget's sink is reachable
+          ++RemainingGadgets;
+        } else { // This gadget's sink is unreachable, and therefore mitigated
+          ++MitigatedGadgets;
+          ElimEdges.insert(E);
+        }
+      }
+    }
+  }
+  return RemainingGadgets;
+}
+
+std::unique_ptr<MachineGadgetGraph>
+X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
+    std::unique_ptr<MachineGadgetGraph> Graph) const {
+  MachineGadgetGraph::NodeSet ElimNodes{*Graph};
+  MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
+  int RemainingGadgets =
+      elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
+  if (ElimEdges.empty() && ElimNodes.empty()) {
+    Graph->NumFences = 0;
+    Graph->NumGadgets = RemainingGadgets;
+  } else {
+    Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
+                               RemainingGadgets);
+  }
+  return Graph;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
+    MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+  int FencesInserted = 0;
+
+  do {
+    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+    Graph = trimMitigatedEdges(std::move(Graph));
+    LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+    if (Graph->NumGadgets == 0)
+      break;
+
+    LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+    EdgeSet CutEdges{*Graph};
+    auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
+                                                  1 /* terminator node */);
+    auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
+    auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
+    auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
+    for (const auto &N : Graph->nodes()) {
+      Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
+    }
+    Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
+    for (const auto &E : Graph->edges()) {
+      Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
+      EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
+    }
+    OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
+                EdgeCuts.get(), Graph->edges_size());
+    for (int I = 0; I < Graph->edges_size(); ++I)
+      if (EdgeCuts[I])
+        CutEdges.set(I);
+    LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+    LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+    LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+    FencesInserted += insertFences(MF, *Graph, CutEdges);
+    LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+    LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+    Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
+                               CutEdges);
+  } while (true);
+
+  return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
+    MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
+  LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
+  Graph = trimMitigatedEdges(std::move(Graph));
+  LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
+  if (Graph->NumGadgets == 0)
+    return 0;
+
+  LLVM_DEBUG(dbgs() << "Cutting edges...\n");
+  MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
+  MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
+  auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
+    return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+           MachineGadgetGraph::isCFGEdge(E);
+  };
+  auto IsGadgetEdge = [&ElimEdges,
+                       &CutEdges](const MachineGadgetGraph::Edge &E) {
+    return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
+           MachineGadgetGraph::isGadgetEdge(E);
+  };
+
+  // FIXME: this is O(E^2), we could probably do better.
+  do {
+    // Find the cheapest CFG edge that will eliminate a gadget (by being
+    // egress from a SOURCE node or ingress to a SINK node), and cut it.
+    const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
+
+    // First, collect all gadget source and sink nodes.
+    MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
+    for (const auto &N : Graph->nodes()) {
+      if (ElimNodes.contains(N))
+        continue;
+      for (const auto &E : N.edges()) {
+        if (IsGadgetEdge(E)) {
+          GadgetSources.insert(N);
+          GadgetSinks.insert(*E.getDest());
+        }
+      }
+    }
+
+    // Next, look for the cheapest CFG edge which, when cut, is guaranteed to
+    // mitigate at least one gadget by either:
+    // (a) being egress from a gadget source, or
+    // (b) being ingress to a gadget sink.
+    for (const auto &N : Graph->nodes()) {
+      if (ElimNodes.contains(N))
+        continue;
+      for (const auto &E : N.edges()) {
+        if (IsCFGEdge(E)) {
+          if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
+            if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
+              CheapestSoFar = &E;
+          }
+        }
+      }
+    }
+
+    assert(CheapestSoFar && "Failed to cut an edge");
+    CutEdges.insert(*CheapestSoFar);
+    ElimEdges.insert(*CheapestSoFar);
+  } while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
+  LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
+  LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
+
+  LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
+  int FencesInserted = insertFences(MF, *Graph, CutEdges);
+  LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
+  LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
+
+  return FencesInserted;
+}
+
+int X86LoadValueInjectionLoadHardeningPass::insertFences(
+    MachineFunction &MF, MachineGadgetGraph &G,
+    EdgeSet &CutEdges /* in, out */) const {
+  int FencesInserted = 0;
+  for (const auto &N : G.nodes()) {
+    for (const auto &E : N.edges()) {
+      if (CutEdges.contains(E)) {
+        MachineInstr *MI = N.getValue(), *Prev;
+        MachineBasicBlock *MBB;                  // Insert an LFENCE in this MBB
+        MachineBasicBlock::iterator InsertionPt; // ...at this point
+        if (MI == MachineGadgetGraph::ArgNodeSentinel) {
+          // insert LFENCE at beginning of entry block
+          MBB = &MF.front();
+          InsertionPt = MBB->begin();
+          Prev = nullptr;
+        } else if (MI->isBranch()) { // insert the LFENCE before the branch
+          MBB = MI->getParent();
+          InsertionPt = MI;
+          Prev = MI->getPrevNode();
+          // Remove all egress CFG edges from this branch because the inserted
+          // LFENCE prevents gadgets from crossing the branch.
+          for (const auto &E : N.edges()) {
+            if (MachineGadgetGraph::isCFGEdge(E))
+              CutEdges.insert(E);
+          }
+        } else { // insert the LFENCE after the instruction
+          MBB = MI->getParent();
+          InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
+          Prev = InsertionPt == MBB->end()
+                     ? (MBB->empty() ? nullptr : &MBB->back())
+                     : InsertionPt->getPrevNode();
+        }
+        // Ensure this insertion is not redundant (two LFENCEs in sequence).
+        if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
+            (!Prev || !isFence(Prev))) {
+          BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+          ++FencesInserted;
+        }
+      }
+    }
+  }
+  return FencesInserted;
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
+    const MachineInstr &MI, unsigned Reg) const {
+  if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
+      MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
+    return false;
+
+  // FIXME: This does not handle pseudo loading instruction like TCRETURN*
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
+  if (MemRefBeginIdx < 0) {
+    LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
+                         "instruction:\n";
+               MI.print(dbgs()); dbgs() << '\n';);
+    return false;
+  }
+  MemRefBeginIdx += X86II::getOperandBias(Desc);
+
+  const MachineOperand &BaseMO =
+      MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
+  const MachineOperand &IndexMO =
+      MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
+  return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
+          TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
+         (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
+          TRI->regsOverlap(IndexMO.getReg(), Reg));
+}
+
+bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
+    const MachineInstr &MI, unsigned Reg) const {
+  if (!MI.isConditionalBranch())
+    return false;
+  for (const MachineOperand &Use : MI.uses())
+    if (Use.isReg() && Use.getReg() == Reg)
+      return true;
+  return false;
+}
+
+INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+                      "X86 LVI load hardening", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
+                    "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
+  return new X86LoadValueInjectionLoadHardeningPass();
+}
+
+namespace {
+
+/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
+/// analysis passes that add complexity to the pipeline. This complexity
+/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
+/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
+/// provide the same security as the optimized pass, but without adding
+/// unnecessary complexity to the LLVM pipeline.
+///
+/// The behavior of this pass is simply to insert an LFENCE after every load
+/// instruction.
+class X86LoadValueInjectionLoadHardeningUnoptimizedPass
+    : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionLoadHardeningUnoptimizedPass()
+      : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+  static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
+
+bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
+  if (!STI->useLVILoadHardening())
+    return false;
+
+  // FIXME: support 32-bit
+  if (!STI->is64Bit())
+    report_fatal_error("LVI load hardening is only supported on 64-bit", false);
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F))
+    return false;
+
+  bool Modified = false;
+  ++NumFunctionsConsidered;
+
+  const TargetInstrInfo *TII = STI->getInstrInfo();
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
+          MI.getOpcode() == X86::MFENCE)
+        continue;
+
+      MachineBasicBlock::iterator InsertionPt =
+          MI.getNextNode() ? MI.getNextNode() : MBB.end();
+      BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
+      ++NumFences;
+      Modified = true;
+    }
+  }
+
+  if (Modified)
+    ++NumFunctionsMitigated;
+
+  return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
+                "X86 LVI load hardening", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
+  return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
+}
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@ -0,0 +1,143 @@
+//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Description: Replaces every `ret` instruction with the sequence:
+/// ```
+/// pop <scratch-reg>
+/// lfence
+/// jmp *<scratch-reg>
+/// ```
+/// where `<scratch-reg>` is some available scratch register, according to the
+/// calling convention of the function being mitigated.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include <bitset>
+
+using namespace llvm;
+
+#define PASS_KEY "x86-lvi-ret"
+#define DEBUG_TYPE PASS_KEY
+
+STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
+STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
+STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
+                                 "were deployed");
+
+namespace {
+
+class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
+public:
+  X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
+  StringRef getPassName() const override {
+    return "X86 Load Value Injection (LVI) Ret-Hardening";
+  }
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  static char ID;
+};
+
+} // end anonymous namespace
+
+char X86LoadValueInjectionRetHardeningPass::ID = 0;
+
+bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
+    MachineFunction &MF) {
+  LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
+                    << " *****\n");
+  const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
+  if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
+    return false; // FIXME: support 32-bit
+
+  // Don't skip functions with the "optnone" attr but participate in opt-bisect.
+  const Function &F = MF.getFunction();
+  if (!F.hasOptNone() && skipFunction(F))
+    return false;
+
+  ++NumFunctionsConsidered;
+  const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const X86InstrInfo *TII = Subtarget->getInstrInfo();
+  unsigned ClobberReg = X86::NoRegister;
+  std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
+  UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
+  UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
+  UnclobberableGR64s.set(X86::RAX); // used for function return
+  UnclobberableGR64s.set(X86::RDX); // used for function return
+
+  // We can clobber any register allowed by the function's calling convention.
+  for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
+    UnclobberableGR64s.set(Reg);
+  for (auto &Reg : X86::GR64RegClass) {
+    if (!UnclobberableGR64s.test(Reg)) {
+      ClobberReg = Reg;
+      break;
+    }
+  }
+
+  if (ClobberReg != X86::NoRegister) {
+    LLVM_DEBUG(dbgs() << "Selected register "
+                      << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
+                      << " to clobber\n");
+  } else {
+    LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
+  }
+
+  bool Modified = false;
+  for (auto &MBB : MF) {
+    if (MBB.empty())
+      continue;
+
+    MachineInstr &MI = MBB.back();
+    if (MI.getOpcode() != X86::RETQ)
+      continue;
+
+    if (ClobberReg != X86::NoRegister) {
+      MBB.erase_instr(&MI);
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
+          .addReg(ClobberReg, RegState::Define)
+          .setMIFlag(MachineInstr::FrameDestroy);
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
+      BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
+          .addReg(ClobberReg);
+    } else {
+      // In case there is no available scratch register, we can still read from
+      // RSP to assert that RSP points to a valid page. The write to RSP is
+      // also helpful because it verifies that the stack's write permissions
+      // are intact.
+      MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
+      addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
+                   X86::RSP, false, 0)
+          .addImm(0)
+          ->addRegisterDead(X86::EFLAGS, TRI);
+    }
+
+    ++NumFences;
+    Modified = true;
+  }
+
+  if (Modified)
+    ++NumFunctionsMitigated;
+  return Modified;
+}
+
+INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
+                "X86 LVI ret hardener", false, false)
+
+FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
+  return new X86LoadValueInjectionRetHardeningPass();
+}
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86MCInstLower.cpp
@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
      break;
    case MachineOperand::MO_Register:
      // FIXME: Add retpoline support and remove this.
-      if (Subtarget->useRetpolineIndirectCalls())
-        report_fatal_error("Lowering register statepoints with retpoline not "
+      if (Subtarget->useIndirectThunkCalls())
+        report_fatal_error("Lowering register statepoints with thunks not "
                           "yet implemented.");
      CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
      CallOpcode = X86::CALL64r;
@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
    EmitAndCountInstruction(
        MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
    // FIXME: Add retpoline support and remove this.
-    if (Subtarget->useRetpolineIndirectCalls())
+    if (Subtarget->useIndirectThunkCalls())
      report_fatal_error(
-          "Lowering patchpoint with retpoline not yet implemented.");
+          "Lowering patchpoint with thunks not yet implemented.");
    EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
  }

--- a/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86RetpolineThunks.cpp
@ -1,286 +0,0 @@
-//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86  --=====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// Pass that injects an MI thunk implementing a "retpoline". This is
-/// a RET-implemented trampoline that is used to lower indirect calls in a way
-/// that prevents speculation on some x86 processors and can be used to mitigate
-/// security vulnerabilities due to targeted speculative execution and side
-/// channels such as CVE-2017-5715.
-///
-/// TODO(chandlerc): All of this code could use better comments and
-/// documentation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86Subtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-retpoline-thunks"
-
-static const char ThunkNamePrefix[] = "__llvm_retpoline_";
-static const char R11ThunkName[]    = "__llvm_retpoline_r11";
-static const char EAXThunkName[]    = "__llvm_retpoline_eax";
-static const char ECXThunkName[]    = "__llvm_retpoline_ecx";
-static const char EDXThunkName[]    = "__llvm_retpoline_edx";
-static const char EDIThunkName[]    = "__llvm_retpoline_edi";
-
-namespace {
-class X86RetpolineThunks : public MachineFunctionPass {
-public:
-  static char ID;
-
-  X86RetpolineThunks() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
-
-  bool doInitialization(Module &M) override;
-  bool runOnMachineFunction(MachineFunction &F) override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    MachineFunctionPass::getAnalysisUsage(AU);
-    AU.addRequired<MachineModuleInfoWrapperPass>();
-    AU.addPreserved<MachineModuleInfoWrapperPass>();
-  }
-
-private:
-  MachineModuleInfo *MMI = nullptr;
-  const TargetMachine *TM = nullptr;
-  bool Is64Bit = false;
-  const X86Subtarget *STI = nullptr;
-  const X86InstrInfo *TII = nullptr;
-
-  bool InsertedThunks = false;
-
-  void createThunkFunction(Module &M, StringRef Name);
-  void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
-  void populateThunk(MachineFunction &MF, unsigned Reg);
-};
-
-} // end anonymous namespace
-
-FunctionPass *llvm::createX86RetpolineThunksPass() {
-  return new X86RetpolineThunks();
-}
-
-char X86RetpolineThunks::ID = 0;
-
-bool X86RetpolineThunks::doInitialization(Module &M) {
-  InsertedThunks = false;
-  return false;
-}
-
-bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
-  LLVM_DEBUG(dbgs() << getPassName() << '\n');
-
-  TM = &MF.getTarget();;
-  STI = &MF.getSubtarget<X86Subtarget>();
-  TII = STI->getInstrInfo();
-  Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
-
-  MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
-  Module &M = const_cast<Module &>(*MMI->getModule());
-
-  // If this function is not a thunk, check to see if we need to insert
-  // a thunk.
-  if (!MF.getName().startswith(ThunkNamePrefix)) {
-    // If we've already inserted a thunk, nothing else to do.
-    if (InsertedThunks)
-      return false;
-
-    // Only add a thunk if one of the functions has the retpoline feature
-    // enabled in its subtarget, and doesn't enable external thunks.
-    // FIXME: Conditionalize on indirect calls so we don't emit a thunk when
-    // nothing will end up calling it.
-    // FIXME: It's a little silly to look at every function just to enumerate
-    // the subtargets, but eventually we'll want to look at them for indirect
-    // calls, so maybe this is OK.
-    if ((!STI->useRetpolineIndirectCalls() &&
-         !STI->useRetpolineIndirectBranches()) ||
-        STI->useRetpolineExternalThunk())
-      return false;
-
-    // Otherwise, we need to insert the thunk.
-    // WARNING: This is not really a well behaving thing to do in a function
-    // pass. We extract the module and insert a new function (and machine
-    // function) directly into the module.
-    if (Is64Bit)
-      createThunkFunction(M, R11ThunkName);
-    else
-      for (StringRef Name :
-           {EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
-        createThunkFunction(M, Name);
-    InsertedThunks = true;
-    return true;
-  }
-
-  // If this *is* a thunk function, we need to populate it with the correct MI.
-  if (Is64Bit) {
-    assert(MF.getName() == "__llvm_retpoline_r11" &&
-           "Should only have an r11 thunk on 64-bit targets");
-
-    // __llvm_retpoline_r11:
-    //   callq .Lr11_call_target
-    // .Lr11_capture_spec:
-    //   pause
-    //   lfence
-    //   jmp .Lr11_capture_spec
-    // .align 16
-    // .Lr11_call_target:
-    //   movq %r11, (%rsp)
-    //   retq
-    populateThunk(MF, X86::R11);
-  } else {
-    // For 32-bit targets we need to emit a collection of thunks for various
-    // possible scratch registers as well as a fallback that uses EDI, which is
-    // normally callee saved.
-    //   __llvm_retpoline_eax:
-    //         calll .Leax_call_target
-    //   .Leax_capture_spec:
-    //         pause
-    //         jmp .Leax_capture_spec
-    //   .align 16
-    //   .Leax_call_target:
-    //         movl %eax, (%esp)  # Clobber return addr
-    //         retl
-    //
-    //   __llvm_retpoline_ecx:
-    //   ... # Same setup
-    //         movl %ecx, (%esp)
-    //         retl
-    //
-    //   __llvm_retpoline_edx:
-    //   ... # Same setup
-    //         movl %edx, (%esp)
-    //         retl
-    //
-    //   __llvm_retpoline_edi:
-    //   ... # Same setup
-    //         movl %edi, (%esp)
-    //         retl
-    if (MF.getName() == EAXThunkName)
-      populateThunk(MF, X86::EAX);
-    else if (MF.getName() == ECXThunkName)
-      populateThunk(MF, X86::ECX);
-    else if (MF.getName() == EDXThunkName)
-      populateThunk(MF, X86::EDX);
-    else if (MF.getName() == EDIThunkName)
-      populateThunk(MF, X86::EDI);
-    else
-      llvm_unreachable("Invalid thunk name on x86-32!");
-  }
-
-  return true;
-}
-
-void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
-  assert(Name.startswith(ThunkNamePrefix) &&
-         "Created a thunk with an unexpected prefix!");
-
-  LLVMContext &Ctx = M.getContext();
-  auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
-  Function *F =
-      Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
-  F->setVisibility(GlobalValue::HiddenVisibility);
-  F->setComdat(M.getOrInsertComdat(Name));
-
-  // Add Attributes so that we don't create a frame, unwind information, or
-  // inline.
-  AttrBuilder B;
-  B.addAttribute(llvm::Attribute::NoUnwind);
-  B.addAttribute(llvm::Attribute::Naked);
-  F->addAttributes(llvm::AttributeList::FunctionIndex, B);
-
-  // Populate our function a bit so that we can verify.
-  BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
-  IRBuilder<> Builder(Entry);
-
-  Builder.CreateRetVoid();
-
-  // MachineFunctions/MachineBasicBlocks aren't created automatically for the
-  // IR-level constructs we already made. Create them and insert them into the
-  // module.
-  MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
-  MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
-
-  // Insert EntryMBB into MF. It's not in the module until we do this.
-  MF.insert(MF.end(), EntryMBB);
-}
-
-void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
-                                                    unsigned Reg) {
-  const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
-  const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
-  addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
-      .addReg(Reg);
-}
-
-void X86RetpolineThunks::populateThunk(MachineFunction &MF,
-                                       unsigned Reg) {
-  // Set MF properties. We never use vregs...
-  MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-
-  // Grab the entry MBB and erase any other blocks. O0 codegen appears to
-  // generate two bbs for the entry block.
-  MachineBasicBlock *Entry = &MF.front();
-  Entry->clear();
-  while (MF.size() > 1)
-    MF.erase(std::next(MF.begin()));
-
-  MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
-  MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
-  MCSymbol *TargetSym = MF.getContext().createTempSymbol();
-  MF.push_back(CaptureSpec);
-  MF.push_back(CallTarget);
-
-  const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
-  const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
-
-  Entry->addLiveIn(Reg);
-  BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
-
-  // The MIR verifier thinks that the CALL in the entry block will fall through
-  // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
-  // the successor, but the MIR verifier doesn't know how to cope with that.
-  Entry->addSuccessor(CaptureSpec);
-
-  // In the capture loop for speculation, we want to stop the processor from
-  // speculating as fast as possible. On Intel processors, the PAUSE instruction
-  // will block speculation without consuming any execution resources. On AMD
-  // processors, the PAUSE instruction is (essentially) a nop, so we also use an
-  // LFENCE instruction which they have advised will stop speculation as well
-  // with minimal resource utilization. We still end the capture with a jump to
-  // form an infinite loop to fully guarantee that no matter what implementation
-  // of the x86 ISA, speculating this code path never escapes.
-  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
-  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
-  BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
-  CaptureSpec->setHasAddressTaken();
-  CaptureSpec->addSuccessor(CaptureSpec);
-
-  CallTarget->addLiveIn(Reg);
-  CallTarget->setHasAddressTaken();
-  CallTarget->setAlignment(Align(16));
-  insertRegReturnAddrClobber(*CallTarget, Reg);
-  CallTarget->back().setPreInstrSymbol(MF, TargetSym);
-  BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
-}
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h
@ -421,6 +421,16 @@ protected:
  /// than emitting one inside the compiler.
  bool UseRetpolineExternalThunk = false;

+  /// Prevent generation of indirect call/branch instructions from memory,
+  /// and force all indirect call/branch instructions from a register to be
+  /// preceded by an LFENCE. Also decompose RET instructions into a
+  /// POP+LFENCE+JMP sequence.
+  bool UseLVIControlFlowIntegrity = false;
+
+  /// Insert LFENCE instructions to prevent data speculatively injected into
+  /// loads from being used maliciously.
+  bool UseLVILoadHardening = false;
+
  /// Use software floating point for code generation.
  bool UseSoftFloat = false;

@ -707,8 +717,21 @@ public:
    return UseRetpolineIndirectBranches;
  }
  bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
+
+  // These are generic getters that OR together all of the thunk types
+  // supported by the subtarget. Therefore useIndirectThunk*() will return true
+  // if any respective thunk feature is enabled.
+  bool useIndirectThunkCalls() const {
+    return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
+  }
+  bool useIndirectThunkBranches() const {
+    return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
+  }
+
  bool preferMaskRegisters() const { return PreferMaskRegisters; }
  bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
+  bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+  bool useLVILoadHardening() const { return UseLVILoadHardening; }

  unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
  unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@ -853,10 +876,10 @@ public:
  /// Return true if the subtarget allows calls to immediate address.
  bool isLegalToCallImmediateAddr() const;

-  /// If we are using retpolines, we need to expand indirectbr to avoid it
+  /// If we are using indirect thunks, we need to expand indirectbr to avoid it
  /// lowering to an actual indirect jump.
  bool enableIndirectBrExpand() const override {
-    return useRetpolineIndirectBranches();
+    return useIndirectThunkBranches();
  }

  /// Enable the MachineScheduler pass for all X86 subtargets.
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86TargetMachine.cpp
@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
  initializeX86SpeculativeLoadHardeningPassPass(PR);
  initializeX86FlagsCopyLoweringPassPass(PR);
  initializeX86CondBrFoldingPassPass(PR);
+  initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
+  initializeX86LoadValueInjectionRetHardeningPassPass(PR);
  initializeX86OptimizeLEAPassPass(PR);
 }

@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {

 void X86PassConfig::addPostRegAlloc() {
  addPass(createX86FloatingPointStackifierPass());
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createX86LoadValueInjectionLoadHardeningPass());
+  else
+    addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
 }

 void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
  const Triple &TT = TM->getTargetTriple();
  const MCAsmInfo *MAI = TM->getMCAsmInfo();

-  addPass(createX86RetpolineThunksPass());
+  addPass(createX86IndirectThunksPass());

  // Insert extra int3 instructions after trailing call instructions to avoid
  // issues in the unwinder.
@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
  // Identify valid longjmp targets for Windows Control Flow Guard.
  if (TT.isOSWindows())
    addPass(createCFGuardLongjmpPass());
+  addPass(createX86LoadValueInjectionRetHardeningPass());
 }

 std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
--- a/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
    Constant *C2;

    // C-(C2-X) --> X+(C-C2)
-    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
+    if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
      return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));

    // C-(X+C2) --> (C-C2)-X
--- a/lib/clang/include/VCSVersion.inc
+++ b/lib/clang/include/VCSVersion.inc
@ -1,14 +1,14 @@
 // $FreeBSD$

-#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
+#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
 #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"

-#define CLANG_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
+#define CLANG_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
 #define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"

 // <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
-#define LLD_REVISION "llvmorg-10.0.0-97-g6f71678ecd2-1300007"
+#define LLD_REVISION "llvmorg-10.0.0-129-gd24d5c8e308-1300007"
 #define LLD_REPOSITORY "FreeBSD"

-#define LLDB_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
+#define LLDB_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
 #define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"
--- a/lib/clang/include/llvm/Support/VCSRevision.h
+++ b/lib/clang/include/llvm/Support/VCSRevision.h
@ -1,3 +1,3 @@
 /* $FreeBSD$ */
-#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
+#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
 #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
--- a/lib/clang/libllvm/Makefile
+++ b/lib/clang/libllvm/Makefile
@ -318,6 +318,9 @@ SRCS_MIN+=	CodeGen/ProcessImplicitDefs.cpp
 SRCS_MIN+=	CodeGen/PrologEpilogInserter.cpp
 SRCS_MIN+=	CodeGen/PseudoSourceValue.cpp
 SRCS_MIN+=	CodeGen/ReachingDefAnalysis.cpp
+SRCS_MIN+=	CodeGen/RDFGraph.cpp
+SRCS_MIN+=	CodeGen/RDFLiveness.cpp
+SRCS_MIN+=	CodeGen/RDFRegisters.cpp
 SRCS_MIN+=	CodeGen/RegAllocBase.cpp
 SRCS_MIN+=	CodeGen/RegAllocBasic.cpp
 SRCS_MIN+=	CodeGen/RegAllocFast.cpp
@ -1284,6 +1287,7 @@ SRCS_MIN+=	Target/X86/X86FrameLowering.cpp
 SRCS_MIN+=	Target/X86/X86ISelDAGToDAG.cpp
 SRCS_MIN+=	Target/X86/X86ISelLowering.cpp
 SRCS_MIN+=	Target/X86/X86IndirectBranchTracking.cpp
+SRCS_MIN+=	Target/X86/X86IndirectThunks.cpp
 SRCS_MIN+=	Target/X86/X86InsertPrefetch.cpp
 SRCS_MIN+=	Target/X86/X86InstrFMA3Info.cpp
 SRCS_MIN+=	Target/X86/X86InstrFoldTables.cpp
@ -1291,6 +1295,8 @@ SRCS_MIN+=	Target/X86/X86InstrInfo.cpp
 SRCS_MIN+=	Target/X86/X86InstructionSelector.cpp
 SRCS_MIN+=	Target/X86/X86InterleavedAccess.cpp
 SRCS_MIN+=	Target/X86/X86LegalizerInfo.cpp
+SRCS_MIN+=	Target/X86/X86LoadValueInjectionLoadHardening.cpp
+SRCS_MIN+=	Target/X86/X86LoadValueInjectionRetHardening.cpp
 SRCS_MIN+=	Target/X86/X86MCInstLower.cpp
 SRCS_MIN+=	Target/X86/X86MachineFunctionInfo.cpp
 SRCS_MIN+=	Target/X86/X86MacroFusion.cpp
@ -1298,7 +1304,6 @@ SRCS_MIN+=	Target/X86/X86OptimizeLEAs.cpp
 SRCS_MIN+=	Target/X86/X86PadShortFunction.cpp
 SRCS_MIN+=	Target/X86/X86RegisterBankInfo.cpp
 SRCS_MIN+=	Target/X86/X86RegisterInfo.cpp
-SRCS_MIN+=	Target/X86/X86RetpolineThunks.cpp
 SRCS_MIN+=	Target/X86/X86SelectionDAGInfo.cpp
 SRCS_MIN+=	Target/X86/X86ShuffleDecodeConstantPool.cpp
 SRCS_MIN+=	Target/X86/X86SpeculativeLoadHardening.cpp