Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
llvmorg-10.0.0-129-gd24d5c8e308. Getting closer to 10.0.1-rc2. MFC after: 3 weeks
This commit is contained in:
commit
d3a8f8c8d5
@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">,
|
||||
Group<m_Group>, Flags<[CoreOption,CC1Option]>;
|
||||
def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
|
||||
Group<m_Group>, Flags<[CoreOption]>;
|
||||
def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
|
||||
HelpText<"Enable all mitigations for Load Value Injection (LVI)">;
|
||||
def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
|
||||
HelpText<"Disable mitigations for Load Value Injection (LVI)">;
|
||||
def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
|
||||
HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
|
||||
def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
|
||||
HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;
|
||||
|
||||
def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
|
||||
HelpText<"Enable linker relaxation">;
|
||||
|
@ -276,11 +276,12 @@ public:
|
||||
break;
|
||||
case 'Q': // Memory operand that is an offset from a register (it is
|
||||
// usually better to use `m' or `es' in asm statements)
|
||||
Info.setAllowsRegister();
|
||||
LLVM_FALLTHROUGH;
|
||||
case 'Z': // Memory operand that is an indexed or indirect from a
|
||||
// register (it is usually better to use `m' or `es' in
|
||||
// asm statements)
|
||||
Info.setAllowsMemory();
|
||||
Info.setAllowsRegister();
|
||||
break;
|
||||
case 'R': // AIX TOC entry
|
||||
case 'a': // Address operand that is an indexed or indirect from a
|
||||
|
@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
|
||||
<< lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
|
||||
}
|
||||
|
||||
if ((Kinds & SanitizerKind::ShadowCallStack) &&
|
||||
TC.getTriple().getArch() == llvm::Triple::aarch64 &&
|
||||
if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() &&
|
||||
!llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) &&
|
||||
!Args.hasArg(options::OPT_ffixed_x18)) {
|
||||
D.Diag(diag::err_drv_argument_only_allowed_with)
|
||||
|
@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
|
||||
if (getTriple().getArch() == llvm::Triple::x86 ||
|
||||
getTriple().getArch() == llvm::Triple::x86_64 ||
|
||||
getTriple().getArch() == llvm::Triple::arm ||
|
||||
getTriple().getArch() == llvm::Triple::aarch64 ||
|
||||
getTriple().getArch() == llvm::Triple::wasm32 ||
|
||||
getTriple().getArch() == llvm::Triple::wasm64)
|
||||
getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
|
||||
Res |= SanitizerKind::CFIICall;
|
||||
if (getTriple().getArch() == llvm::Triple::x86_64 ||
|
||||
getTriple().getArch() == llvm::Triple::aarch64)
|
||||
if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
|
||||
Res |= SanitizerKind::ShadowCallStack;
|
||||
if (getTriple().getArch() == llvm::Triple::aarch64 ||
|
||||
getTriple().getArch() == llvm::Triple::aarch64_be)
|
||||
if (getTriple().isAArch64())
|
||||
Res |= SanitizerKind::MemTag;
|
||||
return Res;
|
||||
}
|
||||
|
@ -147,6 +147,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
// flags). This is a bit hacky but keeps existing usages working. We should
|
||||
// consider deprecating this and instead warn if the user requests external
|
||||
// retpoline thunks and *doesn't* request some form of retpolines.
|
||||
auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
|
||||
if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
|
||||
options::OPT_mspeculative_load_hardening,
|
||||
options::OPT_mno_speculative_load_hardening)) {
|
||||
@ -154,12 +155,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
false)) {
|
||||
Features.push_back("+retpoline-indirect-calls");
|
||||
Features.push_back("+retpoline-indirect-branches");
|
||||
SpectreOpt = options::OPT_mretpoline;
|
||||
} else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
|
||||
options::OPT_mno_speculative_load_hardening,
|
||||
false)) {
|
||||
// On x86, speculative load hardening relies on at least using retpolines
|
||||
// for indirect calls.
|
||||
Features.push_back("+retpoline-indirect-calls");
|
||||
SpectreOpt = options::OPT_mspeculative_load_hardening;
|
||||
}
|
||||
} else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
|
||||
options::OPT_mno_retpoline_external_thunk, false)) {
|
||||
@ -167,6 +170,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
|
||||
// eventually switch to an error here.
|
||||
Features.push_back("+retpoline-indirect-calls");
|
||||
Features.push_back("+retpoline-indirect-branches");
|
||||
SpectreOpt = options::OPT_mretpoline_external_thunk;
|
||||
}
|
||||
|
||||
auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
|
||||
if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening,
|
||||
false)) {
|
||||
Features.push_back("+lvi-load-hardening");
|
||||
Features.push_back("+lvi-cfi"); // load hardening implies CFI protection
|
||||
LVIOpt = options::OPT_mlvi_hardening;
|
||||
} else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi,
|
||||
false)) {
|
||||
Features.push_back("+lvi-cfi");
|
||||
LVIOpt = options::OPT_mlvi_cfi;
|
||||
}
|
||||
|
||||
if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
|
||||
LVIOpt != clang::driver::options::ID::OPT_INVALID) {
|
||||
D.Diag(diag::err_drv_argument_not_allowed_with)
|
||||
<< D.getOpts().getOptionName(SpectreOpt)
|
||||
<< D.getOpts().getOptionName(LVIOpt);
|
||||
}
|
||||
|
||||
// Now add any that the user explicitly requested on the command line,
|
||||
|
@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
|
||||
def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v4i32_ty], [IntrNoMem]>;
|
||||
def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">,
|
||||
Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
|
||||
llvm_v1i128_ty], [IntrNoMem]>;
|
||||
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
|
||||
llvm_v4i32_ty], [IntrNoMem]>;
|
||||
|
@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
|
||||
// constexpr, a dynamic initializer may be emitted depending on optimization
|
||||
// settings. For the affected versions of MSVC, use the old linker
|
||||
// initialization pattern of not providing a constructor and leaving the fields
|
||||
// uninitialized.
|
||||
#if !defined(_MSC_VER) || defined(__clang__)
|
||||
// uninitialized. See http://llvm.org/PR41367 for details.
|
||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__)
|
||||
#define LLVM_USE_CONSTEXPR_CTOR
|
||||
#endif
|
||||
|
||||
|
@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
|
||||
let IsLoad = 1;
|
||||
let MemoryVT = i32;
|
||||
}
|
||||
def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
|
||||
let IsLoad = 1;
|
||||
let MemoryVT = f16;
|
||||
}
|
||||
def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
|
||||
let IsLoad = 1;
|
||||
let MemoryVT = f32;
|
||||
@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
let IsStore = 1;
|
||||
let MemoryVT = i32;
|
||||
}
|
||||
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr)> {
|
||||
let IsStore = 1;
|
||||
let MemoryVT = f16;
|
||||
}
|
||||
def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
|
||||
(truncstore node:$val, node:$ptr)> {
|
||||
let IsStore = 1;
|
||||
|
@ -8,8 +8,6 @@
|
||||
//
|
||||
// Target-independent, SSA-based data flow graph for register data flow (RDF).
|
||||
//
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
@ -20,6 +18,8 @@
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
|
||||
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
|
||||
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
|
||||
LR.insert(RegisterRef(R));
|
||||
if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
|
||||
LR.insert(RegisterRef(R));
|
||||
if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
|
||||
if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
|
||||
LR.insert(RegisterRef(R));
|
||||
}
|
||||
return LR;
|
||||
}
|
||||
|
@ -22,9 +22,6 @@
|
||||
// and Embedded Architectures and Compilers", 8 (4),
|
||||
// <10.1145/2086696.2086706>. <hal-00647369>
|
||||
//
|
||||
#include "RDFLiveness.h"
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
@ -33,6 +30,9 @@
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/MC/LaneBitmask.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
@ -6,11 +6,11 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/MC/LaneBitmask.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(
|
||||
// Include the hash for the current module
|
||||
auto ModHash = Index.getModuleHash(ModuleID);
|
||||
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
|
||||
|
||||
std::vector<uint64_t> ExportsGUID;
|
||||
ExportsGUID.reserve(ExportList.size());
|
||||
for (const auto &VI : ExportList) {
|
||||
auto GUID = VI.getGUID();
|
||||
ExportsGUID.push_back(GUID);
|
||||
}
|
||||
|
||||
// Sort the export list elements GUIDs.
|
||||
llvm::sort(ExportsGUID);
|
||||
for (uint64_t GUID : ExportsGUID) {
|
||||
// The export list can impact the internalization, be conservative here
|
||||
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
|
||||
}
|
||||
@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(
|
||||
// Include the hash for every module we import functions from. The set of
|
||||
// imported symbols for each module may affect code generation and is
|
||||
// sensitive to link order, so include that as well.
|
||||
for (auto &Entry : ImportList) {
|
||||
auto ModHash = Index.getModuleHash(Entry.first());
|
||||
using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
|
||||
std::vector<ImportMapIteratorTy> ImportModulesVector;
|
||||
ImportModulesVector.reserve(ImportList.size());
|
||||
|
||||
for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
|
||||
++It) {
|
||||
ImportModulesVector.push_back(It);
|
||||
}
|
||||
llvm::sort(ImportModulesVector,
|
||||
[](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
|
||||
-> bool { return Lhs->getKey() < Rhs->getKey(); });
|
||||
for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
|
||||
auto ModHash = Index.getModuleHash(EntryIt->first());
|
||||
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
|
||||
|
||||
AddUint64(Entry.second.size());
|
||||
for (auto &Fn : Entry.second)
|
||||
AddUint64(EntryIt->second.size());
|
||||
for (auto &Fn : EntryIt->second)
|
||||
AddUint64(Fn);
|
||||
}
|
||||
|
||||
|
@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
|
||||
<< val << '\n');
|
||||
SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
|
||||
SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
|
||||
|
||||
// After replacement, the current node is dead, we need to
|
||||
// go backward one step to make iterator still work
|
||||
|
@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
|
||||
bool CheckPointer, bool SeenPointer) {
|
||||
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
|
||||
TypeId = DIToIdMap[Ty];
|
||||
|
||||
// To handle the case like the following:
|
||||
// struct t;
|
||||
// typedef struct t _t;
|
||||
// struct s1 { _t *c; };
|
||||
// int test1(struct s1 *arg) { ... }
|
||||
//
|
||||
// struct t { int a; int b; };
|
||||
// struct s2 { _t c; }
|
||||
// int test2(struct s2 *arg) { ... }
|
||||
//
|
||||
// During traversing test1() argument, "_t" is recorded
|
||||
// in DIToIdMap and a forward declaration fixup is created
|
||||
// for "struct t" to avoid pointee type traversal.
|
||||
//
|
||||
// During traversing test2() argument, even if we see "_t" is
|
||||
// already defined, we should keep moving to eventually
|
||||
// bring in types for "struct t". Otherwise, the "struct s2"
|
||||
// definition won't be correct.
|
||||
if (Ty && (!CheckPointer || !SeenPointer)) {
|
||||
if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
|
||||
unsigned Tag = DTy->getTag();
|
||||
if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
|
||||
Tag == dwarf::DW_TAG_volatile_type ||
|
||||
Tag == dwarf::DW_TAG_restrict_type) {
|
||||
uint32_t TmpTypeId;
|
||||
visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
|
||||
SeenPointer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -12,9 +12,6 @@
|
||||
#include "HexagonInstrInfo.h"
|
||||
#include "HexagonSubtarget.h"
|
||||
#include "MCTargetDesc/HexagonBaseInfo.h"
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
@ -27,6 +24,9 @@
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
|
@ -11,9 +11,6 @@
|
||||
#include "MCTargetDesc/HexagonBaseInfo.h"
|
||||
#include "RDFCopy.h"
|
||||
#include "RDFDeadCode.h"
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
@ -24,6 +21,9 @@
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
@ -11,13 +11,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "RDFCopy.h"
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
|
@ -9,9 +9,9 @@
|
||||
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
|
||||
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
|
||||
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
#include "RDFRegisters.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFRegisters.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
@ -9,13 +9,13 @@
|
||||
// RDF-based generic dead code elimination.
|
||||
|
||||
#include "RDFDeadCode.h"
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
#include <queue>
|
||||
|
@ -23,8 +23,8 @@
|
||||
#ifndef RDF_DEADCODE_H
|
||||
#define RDF_DEADCODE_H
|
||||
|
||||
#include "RDFGraph.h"
|
||||
#include "RDFLiveness.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
|
||||
namespace llvm {
|
||||
|
@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
|
||||
VMSUMSHS,
|
||||
VMSUMUBM,
|
||||
VMSUMUHM,
|
||||
VMSUMUDM,
|
||||
VMSUMUHS,
|
||||
VMULESB,
|
||||
VMULESH,
|
||||
|
@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
|
||||
"Enable Hardware Transactional Memory instructions">;
|
||||
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
|
||||
"Implement mftb using the mfspr instruction">;
|
||||
def FeatureUnalignedFloats :
|
||||
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
|
||||
"true", "CPU does not trap on unaligned FP access">;
|
||||
def FeaturePPCPreRASched:
|
||||
SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
|
||||
"Use PowerPC pre-RA scheduling strategy">;
|
||||
@ -252,7 +255,8 @@ def ProcessorFeatures {
|
||||
FeatureExtDiv,
|
||||
FeatureMFTB,
|
||||
DeprecatedDST,
|
||||
FeatureTwoConstNR];
|
||||
FeatureTwoConstNR,
|
||||
FeatureUnalignedFloats];
|
||||
list<SubtargetFeature> P7SpecificFeatures = [];
|
||||
list<SubtargetFeature> P7Features =
|
||||
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
|
||||
|
@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
|
||||
}
|
||||
|
||||
if (Subtarget.isISA3_0()) {
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
|
||||
setTruncStoreAction(MVT::f64, MVT::f16, Legal);
|
||||
setTruncStoreAction(MVT::f32, MVT::f16, Legal);
|
||||
} else {
|
||||
// No extending loads from f16 or HW conversions back and forth.
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
|
||||
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
|
||||
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
|
||||
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
|
||||
}
|
||||
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
|
||||
// PowerPC has pre-inc load and store's.
|
||||
@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
|
||||
}
|
||||
}
|
||||
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
|
||||
if (!Subtarget.hasP8Vector()) {
|
||||
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
|
||||
@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::FP_EXTEND &&
|
||||
"Should only be called for ISD::FP_EXTEND");
|
||||
|
||||
// FIXME: handle extends from half precision float vectors on P9.
|
||||
// We only want to custom lower an extend from v2f32 to v2f64.
|
||||
if (Op.getValueType() != MVT::v2f64 ||
|
||||
Op.getOperand(0).getValueType() != MVT::v2f32)
|
||||
@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
case ISD::BITCAST:
|
||||
// Don't handle bitcast here.
|
||||
return;
|
||||
case ISD::FP_EXTEND:
|
||||
SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
|
||||
if (Lowered)
|
||||
Results.push_back(Lowered);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -15255,6 +15279,10 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
if (!VT.isSimple())
|
||||
return false;
|
||||
|
||||
if (VT.isFloatingPoint() && !VT.isVector() &&
|
||||
!Subtarget.allowsUnalignedFPAccess())
|
||||
return false;
|
||||
|
||||
if (VT.getSimpleVT().isVector()) {
|
||||
if (Subtarget.hasVSX()) {
|
||||
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
|
||||
|
@ -637,7 +637,7 @@ namespace llvm {
|
||||
/// then the VPERM for the shuffle. All in all a very slow sequence.
|
||||
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
|
||||
const override {
|
||||
if (VT.getScalarSizeInBits() % 8 == 0)
|
||||
if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
|
||||
return TypeWidenVector;
|
||||
return TargetLoweringBase::getPreferredVectorAction(VT);
|
||||
}
|
||||
|
@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
|
||||
def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
|
||||
let Predicates = [HasP9Altivec] in {
|
||||
|
||||
// Vector Multiply-Sum
|
||||
def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
|
||||
v1i128, v2i64, v1i128>;
|
||||
|
||||
// i8 element comparisons.
|
||||
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
|
||||
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
|
||||
|
@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
|
||||
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
|
||||
return false;
|
||||
|
||||
// The operand may not necessarily be an immediate - it could be a relocation.
|
||||
if (!ADDIMI.getOperand(2).isImm())
|
||||
return false;
|
||||
|
||||
Imm = ADDIMI.getOperand(2).getImm();
|
||||
|
||||
return true;
|
||||
|
@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
|
||||
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
|
||||
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
|
||||
|
||||
// Load/convert and convert/store patterns for f16.
|
||||
def : Pat<(f64 (extloadf16 xoaddr:$src)),
|
||||
(f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
|
||||
def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
|
||||
(STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
|
||||
def : Pat<(f32 (extloadf16 xoaddr:$src)),
|
||||
(f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
|
||||
def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
|
||||
(STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
|
||||
def : Pat<(f64 (f16_to_fp i32:$A)),
|
||||
(f64 (XSCVHPDP (MTVSRWZ $A)))>;
|
||||
def : Pat<(f32 (f16_to_fp i32:$A)),
|
||||
(f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
|
||||
def : Pat<(i32 (fp_to_f16 f32:$A)),
|
||||
(i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
|
||||
def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
|
||||
|
||||
let Predicates = [IsBigEndian, HasP9Vector] in {
|
||||
// Scalar stores of i8
|
||||
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
|
||||
|
@ -124,6 +124,7 @@ protected:
|
||||
bool IsPPC4xx;
|
||||
bool IsPPC6xx;
|
||||
bool FeatureMFTB;
|
||||
bool AllowsUnalignedFPAccess;
|
||||
bool DeprecatedDST;
|
||||
bool HasLazyResolverStubs;
|
||||
bool IsLittleEndian;
|
||||
@ -274,6 +275,7 @@ public:
|
||||
bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
|
||||
bool isE500() const { return IsE500; }
|
||||
bool isFeatureMFTB() const { return FeatureMFTB; }
|
||||
bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
|
||||
bool isDeprecatedDST() const { return DeprecatedDST; }
|
||||
bool hasICBT() const { return HasICBT; }
|
||||
bool hasInvariantFunctionDescriptors() const {
|
||||
|
446
contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
Normal file
446
contrib/llvm-project/llvm/lib/Target/X86/ImmutableGraph.h
Normal file
@ -0,0 +1,446 @@
|
||||
//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// Description: ImmutableGraph is a fast DAG implementation that cannot be
|
||||
/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
|
||||
/// implemented as two arrays: one containing nodes, and one containing edges.
|
||||
/// The advantages to this implementation are two-fold:
|
||||
/// 1. Iteration and traversal operations benefit from cache locality.
|
||||
/// 2. Operations on sets of nodes/edges are efficient, and representations of
|
||||
/// those sets in memory are compact. For instance, a set of edges is
|
||||
/// implemented as a bit vector, wherein each bit corresponds to one edge in
|
||||
/// the edge array. This implies a lower bound of 64x spatial improvement
|
||||
/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
|
||||
/// insert/erase/contains operations complete in negligible constant time:
|
||||
/// insert and erase require one load and one store, and contains requires
|
||||
/// just one load.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
|
||||
#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/GraphTraits.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
|
||||
using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
|
||||
template <typename> friend class ImmutableGraphBuilder;
|
||||
|
||||
public:
|
||||
using node_value_type = NodeValueT;
|
||||
using edge_value_type = EdgeValueT;
|
||||
using size_type = int;
|
||||
class Node;
|
||||
class Edge {
|
||||
friend class ImmutableGraph;
|
||||
template <typename> friend class ImmutableGraphBuilder;
|
||||
|
||||
const Node *Dest;
|
||||
edge_value_type Value;
|
||||
|
||||
public:
|
||||
const Node *getDest() const { return Dest; };
|
||||
const edge_value_type &getValue() const { return Value; }
|
||||
};
|
||||
class Node {
|
||||
friend class ImmutableGraph;
|
||||
template <typename> friend class ImmutableGraphBuilder;
|
||||
|
||||
const Edge *Edges;
|
||||
node_value_type Value;
|
||||
|
||||
public:
|
||||
const node_value_type &getValue() const { return Value; }
|
||||
|
||||
const Edge *edges_begin() const { return Edges; }
|
||||
// Nodes are allocated sequentially. Edges for a node are stored together.
|
||||
// The end of this Node's edges is the beginning of the next node's edges.
|
||||
// An extra node was allocated to hold the end pointer for the last real
|
||||
// node.
|
||||
const Edge *edges_end() const { return (this + 1)->Edges; }
|
||||
ArrayRef<Edge> edges() const {
|
||||
return makeArrayRef(edges_begin(), edges_end());
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
|
||||
size_type NodesSize, size_type EdgesSize)
|
||||
: Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
|
||||
EdgesSize(EdgesSize) {}
|
||||
ImmutableGraph(const ImmutableGraph &) = delete;
|
||||
ImmutableGraph(ImmutableGraph &&) = delete;
|
||||
ImmutableGraph &operator=(const ImmutableGraph &) = delete;
|
||||
ImmutableGraph &operator=(ImmutableGraph &&) = delete;
|
||||
|
||||
public:
|
||||
ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
|
||||
const Node *nodes_begin() const { return nodes().begin(); }
|
||||
const Node *nodes_end() const { return nodes().end(); }
|
||||
|
||||
ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
|
||||
const Edge *edges_begin() const { return edges().begin(); }
|
||||
const Edge *edges_end() const { return edges().end(); }
|
||||
|
||||
size_type nodes_size() const { return NodesSize; }
|
||||
size_type edges_size() const { return EdgesSize; }
|
||||
|
||||
// Node N must belong to this ImmutableGraph.
|
||||
size_type getNodeIndex(const Node &N) const {
|
||||
return std::distance(nodes_begin(), &N);
|
||||
}
|
||||
// Edge E must belong to this ImmutableGraph.
|
||||
size_type getEdgeIndex(const Edge &E) const {
|
||||
return std::distance(edges_begin(), &E);
|
||||
}
|
||||
|
||||
// FIXME: Could NodeSet and EdgeSet be templated to share code?
|
||||
class NodeSet {
|
||||
const ImmutableGraph &G;
|
||||
BitVector V;
|
||||
|
||||
public:
|
||||
NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
|
||||
: G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
|
||||
bool insert(const Node &N) {
|
||||
size_type Idx = G.getNodeIndex(N);
|
||||
bool AlreadyExists = V.test(Idx);
|
||||
V.set(Idx);
|
||||
return !AlreadyExists;
|
||||
}
|
||||
void erase(const Node &N) {
|
||||
size_type Idx = G.getNodeIndex(N);
|
||||
V.reset(Idx);
|
||||
}
|
||||
bool contains(const Node &N) const {
|
||||
size_type Idx = G.getNodeIndex(N);
|
||||
return V.test(Idx);
|
||||
}
|
||||
void clear() { V.reset(); }
|
||||
size_type empty() const { return V.none(); }
|
||||
/// Return the number of elements in the set
|
||||
size_type count() const { return V.count(); }
|
||||
/// Return the size of the set's domain
|
||||
size_type size() const { return V.size(); }
|
||||
/// Set union
|
||||
NodeSet &operator|=(const NodeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V |= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
/// Set intersection
|
||||
NodeSet &operator&=(const NodeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V &= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
/// Set disjoint union
|
||||
NodeSet &operator^=(const NodeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V ^= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
|
||||
using index_iterator = typename BitVector::const_set_bits_iterator;
|
||||
index_iterator index_begin() const { return V.set_bits_begin(); }
|
||||
index_iterator index_end() const { return V.set_bits_end(); }
|
||||
void set(size_type Idx) { V.set(Idx); }
|
||||
void reset(size_type Idx) { V.reset(Idx); }
|
||||
|
||||
class iterator {
|
||||
const NodeSet &Set;
|
||||
size_type Current;
|
||||
|
||||
void advance() {
|
||||
assert(Current != -1);
|
||||
Current = Set.V.find_next(Current);
|
||||
}
|
||||
|
||||
public:
|
||||
iterator(const NodeSet &Set, size_type Begin)
|
||||
: Set{Set}, Current{Begin} {}
|
||||
iterator operator++(int) {
|
||||
iterator Tmp = *this;
|
||||
advance();
|
||||
return Tmp;
|
||||
}
|
||||
iterator &operator++() {
|
||||
advance();
|
||||
return *this;
|
||||
}
|
||||
Node *operator*() const {
|
||||
assert(Current != -1);
|
||||
return Set.G.nodes_begin() + Current;
|
||||
}
|
||||
bool operator==(const iterator &other) const {
|
||||
assert(&this->Set == &other.Set);
|
||||
return this->Current == other.Current;
|
||||
}
|
||||
bool operator!=(const iterator &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
iterator begin() const { return iterator{*this, V.find_first()}; }
|
||||
iterator end() const { return iterator{*this, -1}; }
|
||||
};
|
||||
|
||||
class EdgeSet {
|
||||
const ImmutableGraph &G;
|
||||
BitVector V;
|
||||
|
||||
public:
|
||||
EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
|
||||
: G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
|
||||
bool insert(const Edge &E) {
|
||||
size_type Idx = G.getEdgeIndex(E);
|
||||
bool AlreadyExists = V.test(Idx);
|
||||
V.set(Idx);
|
||||
return !AlreadyExists;
|
||||
}
|
||||
void erase(const Edge &E) {
|
||||
size_type Idx = G.getEdgeIndex(E);
|
||||
V.reset(Idx);
|
||||
}
|
||||
bool contains(const Edge &E) const {
|
||||
size_type Idx = G.getEdgeIndex(E);
|
||||
return V.test(Idx);
|
||||
}
|
||||
void clear() { V.reset(); }
|
||||
bool empty() const { return V.none(); }
|
||||
/// Return the number of elements in the set
|
||||
size_type count() const { return V.count(); }
|
||||
/// Return the size of the set's domain
|
||||
size_type size() const { return V.size(); }
|
||||
/// Set union
|
||||
EdgeSet &operator|=(const EdgeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V |= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
/// Set intersection
|
||||
EdgeSet &operator&=(const EdgeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V &= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
/// Set disjoint union
|
||||
EdgeSet &operator^=(const EdgeSet &RHS) {
|
||||
assert(&this->G == &RHS.G);
|
||||
V ^= RHS.V;
|
||||
return *this;
|
||||
}
|
||||
|
||||
using index_iterator = typename BitVector::const_set_bits_iterator;
|
||||
index_iterator index_begin() const { return V.set_bits_begin(); }
|
||||
index_iterator index_end() const { return V.set_bits_end(); }
|
||||
void set(size_type Idx) { V.set(Idx); }
|
||||
void reset(size_type Idx) { V.reset(Idx); }
|
||||
|
||||
class iterator {
|
||||
const EdgeSet &Set;
|
||||
size_type Current;
|
||||
|
||||
void advance() {
|
||||
assert(Current != -1);
|
||||
Current = Set.V.find_next(Current);
|
||||
}
|
||||
|
||||
public:
|
||||
iterator(const EdgeSet &Set, size_type Begin)
|
||||
: Set{Set}, Current{Begin} {}
|
||||
iterator operator++(int) {
|
||||
iterator Tmp = *this;
|
||||
advance();
|
||||
return Tmp;
|
||||
}
|
||||
iterator &operator++() {
|
||||
advance();
|
||||
return *this;
|
||||
}
|
||||
Edge *operator*() const {
|
||||
assert(Current != -1);
|
||||
return Set.G.edges_begin() + Current;
|
||||
}
|
||||
bool operator==(const iterator &other) const {
|
||||
assert(&this->Set == &other.Set);
|
||||
return this->Current == other.Current;
|
||||
}
|
||||
bool operator!=(const iterator &other) const { return !(*this == other); }
|
||||
};
|
||||
|
||||
iterator begin() const { return iterator{*this, V.find_first()}; }
|
||||
iterator end() const { return iterator{*this, -1}; }
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<Node[]> Nodes;
|
||||
std::unique_ptr<Edge[]> Edges;
|
||||
size_type NodesSize;
|
||||
size_type EdgesSize;
|
||||
};
|
||||
|
||||
template <typename GraphT> class ImmutableGraphBuilder {
|
||||
using node_value_type = typename GraphT::node_value_type;
|
||||
using edge_value_type = typename GraphT::edge_value_type;
|
||||
static_assert(
|
||||
std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
|
||||
GraphT>::value,
|
||||
"Template argument to ImmutableGraphBuilder must derive from "
|
||||
"ImmutableGraph<>");
|
||||
using size_type = typename GraphT::size_type;
|
||||
using NodeSet = typename GraphT::NodeSet;
|
||||
using Node = typename GraphT::Node;
|
||||
using EdgeSet = typename GraphT::EdgeSet;
|
||||
using Edge = typename GraphT::Edge;
|
||||
using BuilderEdge = std::pair<edge_value_type, size_type>;
|
||||
using EdgeList = std::vector<BuilderEdge>;
|
||||
using BuilderVertex = std::pair<node_value_type, EdgeList>;
|
||||
using VertexVec = std::vector<BuilderVertex>;
|
||||
|
||||
public:
|
||||
using BuilderNodeRef = size_type;
|
||||
|
||||
BuilderNodeRef addVertex(const node_value_type &V) {
|
||||
auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
|
||||
return std::distance(AdjList.begin(), I);
|
||||
}
|
||||
|
||||
void addEdge(const edge_value_type &E, BuilderNodeRef From,
|
||||
BuilderNodeRef To) {
|
||||
AdjList[From].second.emplace_back(E, To);
|
||||
}
|
||||
|
||||
bool empty() const { return AdjList.empty(); }
|
||||
|
||||
template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
|
||||
size_type VertexSize = AdjList.size(), EdgeSize = 0;
|
||||
for (const auto &V : AdjList) {
|
||||
EdgeSize += V.second.size();
|
||||
}
|
||||
auto VertexArray =
|
||||
std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
|
||||
auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
|
||||
size_type VI = 0, EI = 0;
|
||||
for (; VI < VertexSize; ++VI) {
|
||||
VertexArray[VI].Value = std::move(AdjList[VI].first);
|
||||
VertexArray[VI].Edges = &EdgeArray[EI];
|
||||
auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
|
||||
for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
|
||||
auto &E = AdjList[VI].second[VEI];
|
||||
EdgeArray[EI].Value = std::move(E.first);
|
||||
EdgeArray[EI].Dest = &VertexArray[E.second];
|
||||
}
|
||||
}
|
||||
assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
|
||||
VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
|
||||
return std::make_unique<GraphT>(std::move(VertexArray),
|
||||
std::move(EdgeArray), VertexSize, EdgeSize,
|
||||
std::forward<ArgT>(Args)...);
|
||||
}
|
||||
|
||||
template <typename... ArgT>
|
||||
static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
|
||||
const EdgeSet &TrimEdges,
|
||||
ArgT &&... Args) {
|
||||
size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
|
||||
size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
|
||||
auto NewVertexArray =
|
||||
std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
|
||||
auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
|
||||
|
||||
// Walk the nodes and determine the new index for each node.
|
||||
size_type NewNodeIndex = 0;
|
||||
std::vector<size_type> RemappedNodeIndex(G.nodes_size());
|
||||
for (const Node &N : G.nodes()) {
|
||||
if (TrimNodes.contains(N))
|
||||
continue;
|
||||
RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
|
||||
}
|
||||
assert(NewNodeIndex == NewVertexSize &&
|
||||
"Should have assigned NewVertexSize indices");
|
||||
|
||||
size_type VertexI = 0, EdgeI = 0;
|
||||
for (const Node &N : G.nodes()) {
|
||||
if (TrimNodes.contains(N))
|
||||
continue;
|
||||
NewVertexArray[VertexI].Value = N.getValue();
|
||||
NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
|
||||
for (const Edge &E : N.edges()) {
|
||||
if (TrimEdges.contains(E))
|
||||
continue;
|
||||
NewEdgeArray[EdgeI].Value = E.getValue();
|
||||
size_type DestIdx = G.getNodeIndex(*E.getDest());
|
||||
size_type NewIdx = RemappedNodeIndex[DestIdx];
|
||||
assert(NewIdx < NewVertexSize);
|
||||
NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
|
||||
++EdgeI;
|
||||
}
|
||||
++VertexI;
|
||||
}
|
||||
assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
|
||||
"Gadget graph malformed");
|
||||
NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
|
||||
return std::make_unique<GraphT>(std::move(NewVertexArray),
|
||||
std::move(NewEdgeArray), NewVertexSize,
|
||||
NewEdgeSize, std::forward<ArgT>(Args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
VertexVec AdjList;
|
||||
};
|
||||
|
||||
template <typename NodeValueT, typename EdgeValueT>
|
||||
struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
|
||||
using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
|
||||
using NodeRef = typename GraphT::Node const *;
|
||||
using EdgeRef = typename GraphT::Edge const &;
|
||||
|
||||
static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
|
||||
using ChildIteratorType =
|
||||
mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
|
||||
|
||||
static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
|
||||
static ChildIteratorType child_begin(NodeRef N) {
|
||||
return {N->edges_begin(), &edge_dest};
|
||||
}
|
||||
static ChildIteratorType child_end(NodeRef N) {
|
||||
return {N->edges_end(), &edge_dest};
|
||||
}
|
||||
|
||||
static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
|
||||
using nodes_iterator =
|
||||
mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
|
||||
static nodes_iterator nodes_begin(GraphT *G) {
|
||||
return {G->nodes_begin(), &getNode};
|
||||
}
|
||||
static nodes_iterator nodes_end(GraphT *G) {
|
||||
return {G->nodes_end(), &getNode};
|
||||
}
|
||||
|
||||
using ChildEdgeIteratorType = typename GraphT::Edge const *;
|
||||
|
||||
static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
|
||||
return N->edges_begin();
|
||||
}
|
||||
static ChildEdgeIteratorType child_edge_end(NodeRef N) {
|
||||
return N->edges_end();
|
||||
}
|
||||
static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
|
@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
|
||||
FunctionPass *createX86EvexToVexInsts();
|
||||
|
||||
/// This pass creates the thunks for the retpoline feature.
|
||||
FunctionPass *createX86RetpolineThunksPass();
|
||||
FunctionPass *createX86IndirectThunksPass();
|
||||
|
||||
/// This pass ensures instructions featuring a memory operand
|
||||
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
|
||||
@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
|
||||
X86Subtarget &,
|
||||
X86RegisterBankInfo &);
|
||||
|
||||
FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
|
||||
FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
|
||||
FunctionPass *createX86LoadValueInjectionRetHardeningPass();
|
||||
FunctionPass *createX86SpeculativeLoadHardeningPass();
|
||||
|
||||
void initializeEvexToVexInstPassPass(PassRegistry &);
|
||||
@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
|
||||
void initializeX86ExecutionDomainFixPass(PassRegistry &);
|
||||
void initializeX86ExpandPseudoPass(PassRegistry &);
|
||||
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
|
||||
void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
|
||||
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
|
||||
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
|
||||
void initializeX86OptimizeLEAPassPass(PassRegistry &);
|
||||
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);
|
||||
|
||||
|
@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
|
||||
"ourselves. Only has effect when combined with some other retpoline "
|
||||
"feature", [FeatureRetpolineIndirectCalls]>;
|
||||
|
||||
// Mitigate LVI attacks against indirect calls/branches and call returns
|
||||
def FeatureLVIControlFlowIntegrity
|
||||
: SubtargetFeature<
|
||||
"lvi-cfi", "UseLVIControlFlowIntegrity", "true",
|
||||
"Prevent indirect calls/branches from using a memory operand, and "
|
||||
"precede all indirect calls/branches from a register with an "
|
||||
"LFENCE instruction to serialize control flow. Also decompose RET "
|
||||
"instructions into a POP+LFENCE+JMP sequence.">;
|
||||
|
||||
// Mitigate LVI attacks against data loads
|
||||
def FeatureLVILoadHardening
|
||||
: SubtargetFeature<
|
||||
"lvi-load-hardening", "UseLVILoadHardening", "true",
|
||||
"Insert LFENCE instructions to prevent data speculatively injected "
|
||||
"into loads from being used maliciously.">;
|
||||
|
||||
// Direct Move instructions.
|
||||
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
|
||||
"Support movdiri instruction">;
|
||||
|
@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
||||
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
|
||||
return false;
|
||||
|
||||
// Functions using retpoline for indirect calls need to use SDISel.
|
||||
if (Subtarget->useRetpolineIndirectCalls())
|
||||
// Functions using thunks for indirect calls need to use SDISel.
|
||||
if (Subtarget->useIndirectThunkCalls())
|
||||
return false;
|
||||
|
||||
// Handle only C, fastcc, and webkit_js calling conventions for now.
|
||||
|
@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
|
||||
bool InProlog) const {
|
||||
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
|
||||
|
||||
// FIXME: Add retpoline support and remove this.
|
||||
if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
|
||||
// FIXME: Add indirect thunk support and remove this.
|
||||
if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
|
||||
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
|
||||
"code model and retpoline not yet implemented.");
|
||||
"code model and indirect thunks not yet implemented.");
|
||||
|
||||
unsigned CallOp;
|
||||
if (Is64Bit)
|
||||
@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
|
||||
// is laid out within 2^31 bytes of each function body, but this seems
|
||||
// to be sufficient for JIT.
|
||||
// FIXME: Add retpoline support and remove the error here..
|
||||
if (STI.useRetpolineIndirectCalls())
|
||||
if (STI.useIndirectThunkCalls())
|
||||
report_fatal_error("Emitting morestack calls on 64-bit with the large "
|
||||
"code model and retpoline not yet implemented.");
|
||||
"code model and thunks not yet implemented.");
|
||||
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
|
||||
.addReg(X86::RIP)
|
||||
.addImm(0)
|
||||
|
@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
||||
if (OptLevel != CodeGenOpt::None &&
|
||||
// Only do this when the target can fold the load into the call or
|
||||
// jmp.
|
||||
!Subtarget->useRetpolineIndirectCalls() &&
|
||||
!Subtarget->useIndirectThunkCalls() &&
|
||||
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
|
||||
(N->getOpcode() == X86ISD::TC_RETURN &&
|
||||
(Subtarget->is64Bit() ||
|
||||
|
@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
|
||||
}
|
||||
|
||||
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
|
||||
// If the subtarget is using retpolines, we need to not generate jump tables.
|
||||
if (Subtarget.useRetpolineIndirectBranches())
|
||||
// If the subtarget is using thunks, we need to not generate jump tables.
|
||||
if (Subtarget.useIndirectThunkBranches())
|
||||
return false;
|
||||
|
||||
// Otherwise, fallback on the generic logic.
|
||||
@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
|
||||
return BB;
|
||||
}
|
||||
|
||||
static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
|
||||
static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
|
||||
switch (RPOpc) {
|
||||
case X86::RETPOLINE_CALL32:
|
||||
case X86::INDIRECT_THUNK_CALL32:
|
||||
return X86::CALLpcrel32;
|
||||
case X86::RETPOLINE_CALL64:
|
||||
case X86::INDIRECT_THUNK_CALL64:
|
||||
return X86::CALL64pcrel32;
|
||||
case X86::RETPOLINE_TCRETURN32:
|
||||
case X86::INDIRECT_THUNK_TCRETURN32:
|
||||
return X86::TCRETURNdi;
|
||||
case X86::RETPOLINE_TCRETURN64:
|
||||
case X86::INDIRECT_THUNK_TCRETURN64:
|
||||
return X86::TCRETURNdi64;
|
||||
}
|
||||
llvm_unreachable("not retpoline opcode");
|
||||
llvm_unreachable("not indirect thunk opcode");
|
||||
}
|
||||
|
||||
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
||||
unsigned Reg) {
|
||||
static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
|
||||
unsigned Reg) {
|
||||
if (Subtarget.useRetpolineExternalThunk()) {
|
||||
// When using an external thunk for retpolines, we pick names that match the
|
||||
// names GCC happens to use as well. This helps simplify the implementation
|
||||
@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
|
||||
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
||||
return "__x86_indirect_thunk_r11";
|
||||
}
|
||||
llvm_unreachable("unexpected reg for external indirect thunk");
|
||||
}
|
||||
|
||||
if (Subtarget.useRetpolineIndirectCalls() ||
|
||||
Subtarget.useRetpolineIndirectBranches()) {
|
||||
// When targeting an internal COMDAT thunk use an LLVM-specific name.
|
||||
switch (Reg) {
|
||||
case X86::EAX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_eax";
|
||||
case X86::ECX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_ecx";
|
||||
case X86::EDX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edx";
|
||||
case X86::EDI:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edi";
|
||||
case X86::R11:
|
||||
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
||||
return "__llvm_retpoline_r11";
|
||||
}
|
||||
llvm_unreachable("unexpected reg for retpoline");
|
||||
}
|
||||
|
||||
// When targeting an internal COMDAT thunk use an LLVM-specific name.
|
||||
switch (Reg) {
|
||||
case X86::EAX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_eax";
|
||||
case X86::ECX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_ecx";
|
||||
case X86::EDX:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edx";
|
||||
case X86::EDI:
|
||||
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
|
||||
return "__llvm_retpoline_edi";
|
||||
case X86::R11:
|
||||
if (Subtarget.useLVIControlFlowIntegrity()) {
|
||||
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
|
||||
return "__llvm_retpoline_r11";
|
||||
return "__llvm_lvi_thunk_r11";
|
||||
}
|
||||
llvm_unreachable("unexpected reg for retpoline");
|
||||
llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
// Copy the virtual register into the R11 physical register and
|
||||
// call the retpoline thunk.
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
const X86InstrInfo *TII = Subtarget.getInstrInfo();
|
||||
Register CalleeVReg = MI.getOperand(0).getReg();
|
||||
unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
|
||||
unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
|
||||
|
||||
// Find an available scratch register to hold the callee. On 64-bit, we can
|
||||
// just use R11, but we scan for uses anyway to ensure we don't generate
|
||||
@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
|
||||
report_fatal_error("calling convention incompatible with retpoline, no "
|
||||
"available registers");
|
||||
|
||||
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
|
||||
const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
|
||||
.addReg(CalleeVReg);
|
||||
@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
case X86::TLS_base_addr32:
|
||||
case X86::TLS_base_addr64:
|
||||
return EmitLoweredTLSAddr(MI, BB);
|
||||
case X86::RETPOLINE_CALL32:
|
||||
case X86::RETPOLINE_CALL64:
|
||||
case X86::RETPOLINE_TCRETURN32:
|
||||
case X86::RETPOLINE_TCRETURN64:
|
||||
return EmitLoweredRetpoline(MI, BB);
|
||||
case X86::INDIRECT_THUNK_CALL32:
|
||||
case X86::INDIRECT_THUNK_CALL64:
|
||||
case X86::INDIRECT_THUNK_TCRETURN32:
|
||||
case X86::INDIRECT_THUNK_TCRETURN64:
|
||||
return EmitLoweredIndirectThunk(MI, BB);
|
||||
case X86::CATCHRET:
|
||||
return EmitLoweredCatchRet(MI, BB);
|
||||
case X86::CATCHPAD:
|
||||
|
@ -1482,8 +1482,8 @@ namespace llvm {
|
||||
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
|
364
contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
Normal file
364
contrib/llvm-project/llvm/lib/Target/X86/X86IndirectThunks.cpp
Normal file
@ -0,0 +1,364 @@
|
||||
//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// Pass that injects an MI thunk that is used to lower indirect calls in a way
|
||||
/// that prevents speculation on some x86 processors and can be used to mitigate
|
||||
/// security vulnerabilities due to targeted speculative execution and side
|
||||
/// channels such as CVE-2017-5715.
|
||||
///
|
||||
/// Currently supported thunks include:
|
||||
/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
|
||||
/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
|
||||
/// before making an indirect call/jump
|
||||
///
|
||||
/// Note that the reason that this is implemented as a MachineFunctionPass and
|
||||
/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
|
||||
/// serialize all transformations, which can consume lots of memory.
|
||||
///
|
||||
/// TODO(chandlerc): All of this code could use better comments and
|
||||
/// documentation.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "x86-retpoline-thunks"
|
||||
|
||||
static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
|
||||
static const char R11RetpolineName[] = "__llvm_retpoline_r11";
|
||||
static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
|
||||
static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
|
||||
static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
|
||||
static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
|
||||
|
||||
static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
|
||||
static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
|
||||
|
||||
namespace {
|
||||
template <typename Derived> class ThunkInserter {
|
||||
Derived &getDerived() { return *static_cast<Derived *>(this); }
|
||||
|
||||
protected:
|
||||
bool InsertedThunks;
|
||||
void doInitialization(Module &M) {}
|
||||
void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
|
||||
|
||||
public:
|
||||
void init(Module &M) {
|
||||
InsertedThunks = false;
|
||||
getDerived().doInitialization(M);
|
||||
}
|
||||
// return `true` if `MMI` or `MF` was modified
|
||||
bool run(MachineModuleInfo &MMI, MachineFunction &MF);
|
||||
};
|
||||
|
||||
struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
|
||||
const char *getThunkPrefix() { return RetpolineNamePrefix; }
|
||||
bool mayUseThunk(const MachineFunction &MF) {
|
||||
const auto &STI = MF.getSubtarget<X86Subtarget>();
|
||||
return (STI.useRetpolineIndirectCalls() ||
|
||||
STI.useRetpolineIndirectBranches()) &&
|
||||
!STI.useRetpolineExternalThunk();
|
||||
}
|
||||
void insertThunks(MachineModuleInfo &MMI);
|
||||
void populateThunk(MachineFunction &MF);
|
||||
};
|
||||
|
||||
struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
|
||||
const char *getThunkPrefix() { return LVIThunkNamePrefix; }
|
||||
bool mayUseThunk(const MachineFunction &MF) {
|
||||
return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
|
||||
}
|
||||
void insertThunks(MachineModuleInfo &MMI) {
|
||||
createThunkFunction(MMI, R11LVIThunkName);
|
||||
}
|
||||
void populateThunk(MachineFunction &MF) {
|
||||
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
|
||||
// generate two bbs for the entry block.
|
||||
MachineBasicBlock *Entry = &MF.front();
|
||||
Entry->clear();
|
||||
while (MF.size() > 1)
|
||||
MF.erase(std::next(MF.begin()));
|
||||
|
||||
// This code mitigates LVI by replacing each indirect call/jump with a
|
||||
// direct call/jump to a thunk that looks like:
|
||||
// ```
|
||||
// lfence
|
||||
// jmpq *%r11
|
||||
// ```
|
||||
// This ensures that if the value in register %r11 was loaded from memory,
|
||||
// then the value in %r11 is (architecturally) correct prior to the jump.
|
||||
const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
|
||||
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
|
||||
MF.front().addLiveIn(X86::R11);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
class X86IndirectThunks : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
X86IndirectThunks() : MachineFunctionPass(ID) {}
|
||||
|
||||
StringRef getPassName() const override { return "X86 Indirect Thunks"; }
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
AU.addRequired<MachineModuleInfoWrapperPass>();
|
||||
AU.addPreserved<MachineModuleInfoWrapperPass>();
|
||||
}
|
||||
|
||||
private:
|
||||
std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
|
||||
|
||||
// FIXME: When LLVM moves to C++17, these can become folds
|
||||
template <typename... ThunkInserterT>
|
||||
static void initTIs(Module &M,
|
||||
std::tuple<ThunkInserterT...> &ThunkInserters) {
|
||||
(void)std::initializer_list<int>{
|
||||
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
|
||||
}
|
||||
template <typename... ThunkInserterT>
|
||||
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
|
||||
std::tuple<ThunkInserterT...> &ThunkInserters) {
|
||||
bool Modified = false;
|
||||
(void)std::initializer_list<int>{
|
||||
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
|
||||
return Modified;
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
|
||||
if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
|
||||
createThunkFunction(MMI, R11RetpolineName);
|
||||
else
|
||||
for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
|
||||
EDIRetpolineName})
|
||||
createThunkFunction(MMI, Name);
|
||||
}
|
||||
|
||||
void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
|
||||
bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
|
||||
Register ThunkReg;
|
||||
if (Is64Bit) {
|
||||
assert(MF.getName() == "__llvm_retpoline_r11" &&
|
||||
"Should only have an r11 thunk on 64-bit targets");
|
||||
|
||||
// __llvm_retpoline_r11:
|
||||
// callq .Lr11_call_target
|
||||
// .Lr11_capture_spec:
|
||||
// pause
|
||||
// lfence
|
||||
// jmp .Lr11_capture_spec
|
||||
// .align 16
|
||||
// .Lr11_call_target:
|
||||
// movq %r11, (%rsp)
|
||||
// retq
|
||||
ThunkReg = X86::R11;
|
||||
} else {
|
||||
// For 32-bit targets we need to emit a collection of thunks for various
|
||||
// possible scratch registers as well as a fallback that uses EDI, which is
|
||||
// normally callee saved.
|
||||
// __llvm_retpoline_eax:
|
||||
// calll .Leax_call_target
|
||||
// .Leax_capture_spec:
|
||||
// pause
|
||||
// jmp .Leax_capture_spec
|
||||
// .align 16
|
||||
// .Leax_call_target:
|
||||
// movl %eax, (%esp) # Clobber return addr
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_ecx:
|
||||
// ... # Same setup
|
||||
// movl %ecx, (%esp)
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_edx:
|
||||
// ... # Same setup
|
||||
// movl %edx, (%esp)
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_edi:
|
||||
// ... # Same setup
|
||||
// movl %edi, (%esp)
|
||||
// retl
|
||||
if (MF.getName() == EAXRetpolineName)
|
||||
ThunkReg = X86::EAX;
|
||||
else if (MF.getName() == ECXRetpolineName)
|
||||
ThunkReg = X86::ECX;
|
||||
else if (MF.getName() == EDXRetpolineName)
|
||||
ThunkReg = X86::EDX;
|
||||
else if (MF.getName() == EDIRetpolineName)
|
||||
ThunkReg = X86::EDI;
|
||||
else
|
||||
llvm_unreachable("Invalid thunk name on x86-32!");
|
||||
}
|
||||
|
||||
const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
|
||||
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
|
||||
// generate two bbs for the entry block.
|
||||
MachineBasicBlock *Entry = &MF.front();
|
||||
Entry->clear();
|
||||
while (MF.size() > 1)
|
||||
MF.erase(std::next(MF.begin()));
|
||||
|
||||
MachineBasicBlock *CaptureSpec =
|
||||
MF.CreateMachineBasicBlock(Entry->getBasicBlock());
|
||||
MachineBasicBlock *CallTarget =
|
||||
MF.CreateMachineBasicBlock(Entry->getBasicBlock());
|
||||
MCSymbol *TargetSym = MF.getContext().createTempSymbol();
|
||||
MF.push_back(CaptureSpec);
|
||||
MF.push_back(CallTarget);
|
||||
|
||||
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
|
||||
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
|
||||
|
||||
Entry->addLiveIn(ThunkReg);
|
||||
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
|
||||
|
||||
// The MIR verifier thinks that the CALL in the entry block will fall through
|
||||
// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
|
||||
// the successor, but the MIR verifier doesn't know how to cope with that.
|
||||
Entry->addSuccessor(CaptureSpec);
|
||||
|
||||
// In the capture loop for speculation, we want to stop the processor from
|
||||
// speculating as fast as possible. On Intel processors, the PAUSE instruction
|
||||
// will block speculation without consuming any execution resources. On AMD
|
||||
// processors, the PAUSE instruction is (essentially) a nop, so we also use an
|
||||
// LFENCE instruction which they have advised will stop speculation as well
|
||||
// with minimal resource utilization. We still end the capture with a jump to
|
||||
// form an infinite loop to fully guarantee that no matter what implementation
|
||||
// of the x86 ISA, speculating this code path never escapes.
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
|
||||
CaptureSpec->setHasAddressTaken();
|
||||
CaptureSpec->addSuccessor(CaptureSpec);
|
||||
|
||||
CallTarget->addLiveIn(ThunkReg);
|
||||
CallTarget->setHasAddressTaken();
|
||||
CallTarget->setAlignment(Align(16));
|
||||
|
||||
// Insert return address clobber
|
||||
const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
|
||||
const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
|
||||
addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
|
||||
0)
|
||||
.addReg(ThunkReg);
|
||||
|
||||
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
|
||||
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
|
||||
StringRef Name) {
|
||||
assert(Name.startswith(getDerived().getThunkPrefix()) &&
|
||||
"Created a thunk with an unexpected prefix!");
|
||||
|
||||
Module &M = const_cast<Module &>(*MMI.getModule());
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
|
||||
Function *F =
|
||||
Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
|
||||
F->setVisibility(GlobalValue::HiddenVisibility);
|
||||
F->setComdat(M.getOrInsertComdat(Name));
|
||||
|
||||
// Add Attributes so that we don't create a frame, unwind information, or
|
||||
// inline.
|
||||
AttrBuilder B;
|
||||
B.addAttribute(llvm::Attribute::NoUnwind);
|
||||
B.addAttribute(llvm::Attribute::Naked);
|
||||
F->addAttributes(llvm::AttributeList::FunctionIndex, B);
|
||||
|
||||
// Populate our function a bit so that we can verify.
|
||||
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
|
||||
IRBuilder<> Builder(Entry);
|
||||
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// MachineFunctions/MachineBasicBlocks aren't created automatically for the
|
||||
// IR-level constructs we already made. Create them and insert them into the
|
||||
// module.
|
||||
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
|
||||
MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
|
||||
|
||||
// Insert EntryMBB into MF. It's not in the module until we do this.
|
||||
MF.insert(MF.end(), EntryMBB);
|
||||
// Set MF properties. We never use vregs...
|
||||
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
|
||||
}
|
||||
|
||||
template <typename Derived>
|
||||
bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
|
||||
// If MF is not a thunk, check to see if we need to insert a thunk.
|
||||
if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
|
||||
// If we've already inserted a thunk, nothing else to do.
|
||||
if (InsertedThunks)
|
||||
return false;
|
||||
|
||||
// Only add a thunk if one of the functions has the corresponding feature
|
||||
// enabled in its subtarget, and doesn't enable external thunks.
|
||||
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
|
||||
// nothing will end up calling it.
|
||||
// FIXME: It's a little silly to look at every function just to enumerate
|
||||
// the subtargets, but eventually we'll want to look at them for indirect
|
||||
// calls, so maybe this is OK.
|
||||
if (!getDerived().mayUseThunk(MF))
|
||||
return false;
|
||||
|
||||
getDerived().insertThunks(MMI);
|
||||
InsertedThunks = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If this *is* a thunk function, we need to populate it with the correct MI.
|
||||
getDerived().populateThunk(MF);
|
||||
return true;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createX86IndirectThunksPass() {
|
||||
return new X86IndirectThunks();
|
||||
}
|
||||
|
||||
char X86IndirectThunks::ID = 0;
|
||||
|
||||
bool X86IndirectThunks::doInitialization(Module &M) {
|
||||
initTIs(M, TIs);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << getPassName() << '\n');
|
||||
auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
|
||||
return runTIs(MMI, MF, TIs);
|
||||
}
|
@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
|
||||
|
||||
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
|
||||
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
|
||||
Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
|
||||
|
||||
// FIXME: This is disabled for 32-bit PIC mode because the global base
|
||||
// register which is part of the address mode may be assigned a
|
||||
// callee-saved register.
|
||||
def : Pat<(X86tcret (load addr:$dst), imm:$off),
|
||||
(TCRETURNmi addr:$dst, imm:$off)>,
|
||||
Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
|
||||
Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
|
||||
|
||||
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
|
||||
@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
|
||||
|
||||
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
|
||||
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
|
||||
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
|
||||
|
||||
// Don't fold loads into X86tcret requiring more than 6 regs.
|
||||
// There wouldn't be enough scratch registers for base+index.
|
||||
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
|
||||
(TCRETURNmi64 addr:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
|
||||
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
|
||||
|
||||
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
|
||||
(RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
|
||||
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode, UseIndirectThunkCalls]>;
|
||||
|
||||
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
|
||||
(RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
|
||||
(INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
|
||||
Requires<[Not64BitMode, UseIndirectThunkCalls]>;
|
||||
|
||||
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
|
||||
|
@ -237,13 +237,13 @@ let isCall = 1 in
|
||||
Sched<[WriteJumpLd]>;
|
||||
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
|
||||
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
|
||||
Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
|
||||
Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
|
||||
Sched<[WriteJump]>;
|
||||
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
|
||||
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
|
||||
OpSize32,
|
||||
Requires<[Not64BitMode,FavorMemIndirectCall,
|
||||
NotUseRetpolineIndirectCalls]>,
|
||||
NotUseIndirectThunkCalls]>,
|
||||
Sched<[WriteJumpLd]>;
|
||||
|
||||
// Non-tracking calls for IBT, use with caution.
|
||||
@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
|
||||
Requires<[In64BitMode]>;
|
||||
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
|
||||
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
|
||||
Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
|
||||
Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
|
||||
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
|
||||
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
|
||||
Requires<[In64BitMode,FavorMemIndirectCall,
|
||||
NotUseRetpolineIndirectCalls]>;
|
||||
NotUseIndirectThunkCalls]>;
|
||||
|
||||
// Non-tracking calls for IBT, use with caution.
|
||||
let isCodeGenOnly = 1 in {
|
||||
@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
|
||||
Uses = [RSP, SSP],
|
||||
usesCustomInserter = 1,
|
||||
SchedRW = [WriteJump] in {
|
||||
def RETPOLINE_CALL32 :
|
||||
def INDIRECT_THUNK_CALL32 :
|
||||
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
|
||||
Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
|
||||
Requires<[Not64BitMode,UseIndirectThunkCalls]>;
|
||||
|
||||
def RETPOLINE_CALL64 :
|
||||
def INDIRECT_THUNK_CALL64 :
|
||||
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
|
||||
Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
|
||||
Requires<[In64BitMode,UseIndirectThunkCalls]>;
|
||||
|
||||
// Retpoline variant of indirect tail calls.
|
||||
// Indirect thunk variant of indirect tail calls.
|
||||
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
|
||||
def RETPOLINE_TCRETURN64 :
|
||||
def INDIRECT_THUNK_TCRETURN64 :
|
||||
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
|
||||
def RETPOLINE_TCRETURN32 :
|
||||
def INDIRECT_THUNK_TCRETURN32 :
|
||||
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
|
||||
}
|
||||
}
|
||||
|
@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
|
||||
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
|
||||
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
|
||||
def HasMFence : Predicate<"Subtarget->hasMFence()">;
|
||||
def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
|
||||
def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
|
||||
def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
|
||||
def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -0,0 +1,900 @@
|
||||
//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
|
||||
/// of a load from memory (i.e., SOURCE), and any operation that may transmit
|
||||
/// the value loaded from memory over a covert channel, or use the value loaded
|
||||
/// from memory to determine a branch/call target (i.e., SINK). After finding
|
||||
/// all such gadgets in a given function, the pass minimally inserts LFENCE
|
||||
/// instructions in such a manner that the following property is satisfied: for
|
||||
/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
|
||||
/// least one LFENCE instruction. The algorithm that implements this minimal
|
||||
/// insertion is influenced by an academic paper that minimally inserts memory
|
||||
/// fences for high-performance concurrent programs:
|
||||
/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
|
||||
/// The algorithm implemented in this pass is as follows:
|
||||
/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
|
||||
/// following components:
|
||||
/// - SOURCE instructions (also includes function arguments)
|
||||
/// - SINK instructions
|
||||
/// - Basic block entry points
|
||||
/// - Basic block terminators
|
||||
/// - LFENCE instructions
|
||||
/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
|
||||
/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
|
||||
/// mitigated, go to step 6.
|
||||
/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
|
||||
/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
|
||||
/// 5. Go to step 2.
|
||||
/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
|
||||
/// to tell LLVM that the function was modified.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "ImmutableGraph.h"
|
||||
#include "X86.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "X86TargetMachine.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineDominanceFrontier.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/RDFGraph.h"
|
||||
#include "llvm/CodeGen/RDFLiveness.h"
|
||||
#include "llvm/InitializePasses.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/DOTGraphTraits.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
#include "llvm/Support/GraphWriter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define PASS_KEY "x86-lvi-load"
|
||||
#define DEBUG_TYPE PASS_KEY
|
||||
|
||||
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
|
||||
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
|
||||
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
|
||||
"were deployed");
|
||||
STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
|
||||
|
||||
static cl::opt<std::string> OptimizePluginPath(
|
||||
PASS_KEY "-opt-plugin",
|
||||
cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> NoConditionalBranches(
|
||||
PASS_KEY "-no-cbranch",
|
||||
cl::desc("Don't treat conditional branches as disclosure gadgets. This "
|
||||
"may improve performance, at the cost of security."),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EmitDot(
|
||||
PASS_KEY "-dot",
|
||||
cl::desc(
|
||||
"For each function, emit a dot graph depicting potential LVI gadgets"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EmitDotOnly(
|
||||
PASS_KEY "-dot-only",
|
||||
cl::desc("For each function, emit a dot graph depicting potential LVI "
|
||||
"gadgets, and do not insert any fences"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> EmitDotVerify(
|
||||
PASS_KEY "-dot-verify",
|
||||
cl::desc("For each function, emit a dot graph to stdout depicting "
|
||||
"potential LVI gadgets, used for testing purposes only"),
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
static llvm::sys::DynamicLibrary OptimizeDL;
|
||||
typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
|
||||
unsigned int *edges, int *edge_values,
|
||||
int *cut_edges /* out */, unsigned int edges_size);
|
||||
static OptimizeCutT OptimizeCut = nullptr;
|
||||
|
||||
namespace {
|
||||
|
||||
struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
|
||||
static constexpr int GadgetEdgeSentinel = -1;
|
||||
static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
|
||||
|
||||
using GraphT = ImmutableGraph<MachineInstr *, int>;
|
||||
using Node = typename GraphT::Node;
|
||||
using Edge = typename GraphT::Edge;
|
||||
using size_type = typename GraphT::size_type;
|
||||
MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
|
||||
std::unique_ptr<Edge[]> Edges, size_type NodesSize,
|
||||
size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
|
||||
: GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
|
||||
NumFences(NumFences), NumGadgets(NumGadgets) {}
|
||||
static inline bool isCFGEdge(const Edge &E) {
|
||||
return E.getValue() != GadgetEdgeSentinel;
|
||||
}
|
||||
static inline bool isGadgetEdge(const Edge &E) {
|
||||
return E.getValue() == GadgetEdgeSentinel;
|
||||
}
|
||||
int NumFences;
|
||||
int NumGadgets;
|
||||
};
|
||||
|
||||
class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
|
||||
public:
|
||||
X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "X86 Load Value Injection (LVI) Load Hardening";
|
||||
}
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
static char ID;
|
||||
|
||||
private:
|
||||
using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
|
||||
using EdgeSet = MachineGadgetGraph::EdgeSet;
|
||||
using NodeSet = MachineGadgetGraph::NodeSet;
|
||||
using Gadget = std::pair<MachineInstr *, MachineInstr *>;
|
||||
|
||||
const X86Subtarget *STI;
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
std::unique_ptr<MachineGadgetGraph>
|
||||
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
|
||||
const MachineDominatorTree &MDT,
|
||||
const MachineDominanceFrontier &MDF) const;
|
||||
int hardenLoadsWithPlugin(MachineFunction &MF,
|
||||
std::unique_ptr<MachineGadgetGraph> Graph) const;
|
||||
int hardenLoadsWithGreedyHeuristic(
|
||||
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
|
||||
int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
|
||||
EdgeSet &ElimEdges /* in, out */,
|
||||
NodeSet &ElimNodes /* in, out */) const;
|
||||
std::unique_ptr<MachineGadgetGraph>
|
||||
trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
|
||||
void findAndCutEdges(MachineGadgetGraph &G,
|
||||
EdgeSet &CutEdges /* out */) const;
|
||||
int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
|
||||
EdgeSet &CutEdges /* in, out */) const;
|
||||
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
|
||||
bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
|
||||
inline bool isFence(const MachineInstr *MI) const {
|
||||
return MI && (MI->getOpcode() == X86::LFENCE ||
|
||||
(STI->useLVIControlFlowIntegrity() && MI->isCall()));
|
||||
}
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
namespace llvm {
|
||||
|
||||
template <>
|
||||
struct GraphTraits<MachineGadgetGraph *>
|
||||
: GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
|
||||
|
||||
template <>
|
||||
struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
|
||||
using GraphType = MachineGadgetGraph;
|
||||
using Traits = llvm::GraphTraits<GraphType *>;
|
||||
using NodeRef = typename Traits::NodeRef;
|
||||
using EdgeRef = typename Traits::EdgeRef;
|
||||
using ChildIteratorType = typename Traits::ChildIteratorType;
|
||||
using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
|
||||
|
||||
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
|
||||
|
||||
std::string getNodeLabel(NodeRef Node, GraphType *) {
|
||||
if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
|
||||
return "ARGS";
|
||||
|
||||
std::string Str;
|
||||
raw_string_ostream OS(Str);
|
||||
OS << *Node->getValue();
|
||||
return OS.str();
|
||||
}
|
||||
|
||||
static std::string getNodeAttributes(NodeRef Node, GraphType *) {
|
||||
MachineInstr *MI = Node->getValue();
|
||||
if (MI == MachineGadgetGraph::ArgNodeSentinel)
|
||||
return "color = blue";
|
||||
if (MI->getOpcode() == X86::LFENCE)
|
||||
return "color = green";
|
||||
return "";
|
||||
}
|
||||
|
||||
static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
|
||||
GraphType *) {
|
||||
int EdgeVal = (*E.getCurrent()).getValue();
|
||||
return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
|
||||
: "color = red, style = \"dashed\"";
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
|
||||
constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
|
||||
|
||||
char X86LoadValueInjectionLoadHardeningPass::ID = 0;
|
||||
|
||||
void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
|
||||
AnalysisUsage &AU) const {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addRequired<MachineDominanceFrontier>();
|
||||
AU.setPreservesCFG();
|
||||
}
|
||||
|
||||
static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
|
||||
MachineGadgetGraph *G) {
|
||||
WriteGraph(OS, G, /*ShortNames*/ false,
|
||||
"Speculative gadgets for \"" + MF.getName() + "\" function");
|
||||
}
|
||||
|
||||
bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
|
||||
MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
|
||||
<< " *****\n");
|
||||
STI = &MF.getSubtarget<X86Subtarget>();
|
||||
if (!STI->useLVILoadHardening())
|
||||
return false;
|
||||
|
||||
// FIXME: support 32-bit
|
||||
if (!STI->is64Bit())
|
||||
report_fatal_error("LVI load hardening is only supported on 64-bit", false);
|
||||
|
||||
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
|
||||
const Function &F = MF.getFunction();
|
||||
if (!F.hasOptNone() && skipFunction(F))
|
||||
return false;
|
||||
|
||||
++NumFunctionsConsidered;
|
||||
TII = STI->getInstrInfo();
|
||||
TRI = STI->getRegisterInfo();
|
||||
LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
|
||||
const auto &MLI = getAnalysis<MachineLoopInfo>();
|
||||
const auto &MDT = getAnalysis<MachineDominatorTree>();
|
||||
const auto &MDF = getAnalysis<MachineDominanceFrontier>();
|
||||
std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
|
||||
LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
|
||||
if (Graph == nullptr)
|
||||
return false; // didn't find any gadgets
|
||||
|
||||
if (EmitDotVerify) {
|
||||
WriteGadgetGraph(outs(), MF, Graph.get());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (EmitDot || EmitDotOnly) {
|
||||
LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
|
||||
std::error_code FileError;
|
||||
std::string FileName = "lvi.";
|
||||
FileName += MF.getName();
|
||||
FileName += ".dot";
|
||||
raw_fd_ostream FileOut(FileName, FileError);
|
||||
if (FileError)
|
||||
errs() << FileError.message();
|
||||
WriteGadgetGraph(FileOut, MF, Graph.get());
|
||||
FileOut.close();
|
||||
LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
|
||||
if (EmitDotOnly)
|
||||
return false;
|
||||
}
|
||||
|
||||
int FencesInserted;
|
||||
if (!OptimizePluginPath.empty()) {
|
||||
if (!OptimizeDL.isValid()) {
|
||||
std::string ErrorMsg;
|
||||
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
|
||||
OptimizePluginPath.c_str(), &ErrorMsg);
|
||||
if (!ErrorMsg.empty())
|
||||
report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
|
||||
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
|
||||
if (!OptimizeCut)
|
||||
report_fatal_error("Invalid optimization plugin");
|
||||
}
|
||||
FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
|
||||
} else { // Use the default greedy heuristic
|
||||
FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
|
||||
}
|
||||
|
||||
if (FencesInserted > 0)
|
||||
++NumFunctionsMitigated;
|
||||
NumFences += FencesInserted;
|
||||
return (FencesInserted > 0);
|
||||
}
|
||||
|
||||
std::unique_ptr<MachineGadgetGraph>
|
||||
X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
|
||||
MachineFunction &MF, const MachineLoopInfo &MLI,
|
||||
const MachineDominatorTree &MDT,
|
||||
const MachineDominanceFrontier &MDF) const {
|
||||
using namespace rdf;
|
||||
|
||||
// Build the Register Dataflow Graph using the RDF framework
|
||||
TargetOperandInfo TOI{*TII};
|
||||
DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
|
||||
DFG.build();
|
||||
Liveness L{MF.getRegInfo(), DFG};
|
||||
L.computePhiInfo();
|
||||
|
||||
GraphBuilder Builder;
|
||||
using GraphIter = typename GraphBuilder::BuilderNodeRef;
|
||||
DenseMap<MachineInstr *, GraphIter> NodeMap;
|
||||
int FenceCount = 0, GadgetCount = 0;
|
||||
auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
|
||||
auto Ref = NodeMap.find(MI);
|
||||
if (Ref == NodeMap.end()) {
|
||||
auto I = Builder.addVertex(MI);
|
||||
NodeMap[MI] = I;
|
||||
return std::pair<GraphIter, bool>{I, true};
|
||||
}
|
||||
return std::pair<GraphIter, bool>{Ref->getSecond(), false};
|
||||
};
|
||||
|
||||
// The `Transmitters` map memoizes transmitters found for each def. If a def
|
||||
// has not yet been analyzed, then it will not appear in the map. If a def
|
||||
// has been analyzed and was determined not to have any transmitters, then
|
||||
// its list of transmitters will be empty.
|
||||
DenseMap<NodeId, std::vector<NodeId>> Transmitters;
|
||||
|
||||
// Analyze all machine instructions to find gadgets and LFENCEs, adding
|
||||
// each interesting value to `Nodes`
|
||||
auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
|
||||
SmallSet<NodeId, 8> UsesVisited, DefsVisited;
|
||||
std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
|
||||
[&](NodeAddr<DefNode *> Def) {
|
||||
if (Transmitters.find(Def.Id) != Transmitters.end())
|
||||
return; // Already analyzed `Def`
|
||||
|
||||
// Use RDF to find all the uses of `Def`
|
||||
rdf::NodeSet Uses;
|
||||
RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
|
||||
for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
|
||||
auto Use = DFG.addr<UseNode *>(UseID);
|
||||
if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
|
||||
NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
|
||||
for (auto I : L.getRealUses(Phi.Id)) {
|
||||
if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
|
||||
for (auto UA : I.second)
|
||||
Uses.emplace(UA.first);
|
||||
}
|
||||
}
|
||||
} else { // not a phi node
|
||||
Uses.emplace(UseID);
|
||||
}
|
||||
}
|
||||
|
||||
// For each use of `Def`, we want to know whether:
|
||||
// (1) The use can leak the Def'ed value,
|
||||
// (2) The use can further propagate the Def'ed value to more defs
|
||||
for (auto UseID : Uses) {
|
||||
if (!UsesVisited.insert(UseID).second)
|
||||
continue; // Already visited this use of `Def`
|
||||
|
||||
auto Use = DFG.addr<UseNode *>(UseID);
|
||||
assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
|
||||
MachineOperand &UseMO = Use.Addr->getOp();
|
||||
MachineInstr &UseMI = *UseMO.getParent();
|
||||
assert(UseMO.isReg());
|
||||
|
||||
// We naively assume that an instruction propagates any loaded
|
||||
// uses to all defs unless the instruction is a call, in which
|
||||
// case all arguments will be treated as gadget sources during
|
||||
// analysis of the callee function.
|
||||
if (UseMI.isCall())
|
||||
continue;
|
||||
|
||||
// Check whether this use can transmit (leak) its value.
|
||||
if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
|
||||
(!NoConditionalBranches &&
|
||||
instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
|
||||
Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
|
||||
if (UseMI.mayLoad())
|
||||
continue; // Found a transmitting load -- no need to continue
|
||||
// traversing its defs (i.e., this load will become
|
||||
// a new gadget source anyways).
|
||||
}
|
||||
|
||||
// Check whether the use propagates to more defs.
|
||||
NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
|
||||
rdf::NodeList AnalyzedChildDefs;
|
||||
for (auto &ChildDef :
|
||||
Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
|
||||
if (!DefsVisited.insert(ChildDef.Id).second)
|
||||
continue; // Already visited this def
|
||||
if (Def.Addr->getAttrs() & NodeAttrs::Dead)
|
||||
continue;
|
||||
if (Def.Id == ChildDef.Id)
|
||||
continue; // `Def` uses itself (e.g., increment loop counter)
|
||||
|
||||
AnalyzeDefUseChain(ChildDef);
|
||||
|
||||
// `Def` inherits all of its child defs' transmitters.
|
||||
for (auto TransmitterId : Transmitters[ChildDef.Id])
|
||||
Transmitters[Def.Id].push_back(TransmitterId);
|
||||
}
|
||||
}
|
||||
|
||||
// Note that this statement adds `Def.Id` to the map if no
|
||||
// transmitters were found for `Def`.
|
||||
auto &DefTransmitters = Transmitters[Def.Id];
|
||||
|
||||
// Remove duplicate transmitters
|
||||
llvm::sort(DefTransmitters);
|
||||
DefTransmitters.erase(
|
||||
std::unique(DefTransmitters.begin(), DefTransmitters.end()),
|
||||
DefTransmitters.end());
|
||||
};
|
||||
|
||||
// Find all of the transmitters
|
||||
AnalyzeDefUseChain(SourceDef);
|
||||
auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
|
||||
if (SourceDefTransmitters.empty())
|
||||
return; // No transmitters for `SourceDef`
|
||||
|
||||
MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
|
||||
? MachineGadgetGraph::ArgNodeSentinel
|
||||
: SourceDef.Addr->getOp().getParent();
|
||||
auto GadgetSource = MaybeAddNode(Source);
|
||||
// Each transmitter is a sink for `SourceDef`.
|
||||
for (auto TransmitterId : SourceDefTransmitters) {
|
||||
MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
|
||||
auto GadgetSink = MaybeAddNode(Sink);
|
||||
// Add the gadget edge to the graph.
|
||||
Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
|
||||
GadgetSource.first, GadgetSink.first);
|
||||
++GadgetCount;
|
||||
}
|
||||
};
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
|
||||
// Analyze function arguments
|
||||
NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
|
||||
for (NodeAddr<PhiNode *> ArgPhi :
|
||||
EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
|
||||
NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
|
||||
llvm::for_each(Defs, AnalyzeDef);
|
||||
}
|
||||
// Analyze every instruction in MF
|
||||
for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
|
||||
for (NodeAddr<StmtNode *> SA :
|
||||
BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
|
||||
MachineInstr *MI = SA.Addr->getCode();
|
||||
if (isFence(MI)) {
|
||||
MaybeAddNode(MI);
|
||||
++FenceCount;
|
||||
} else if (MI->mayLoad()) {
|
||||
NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
|
||||
llvm::for_each(Defs, AnalyzeDef);
|
||||
}
|
||||
}
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
|
||||
LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
|
||||
if (GadgetCount == 0)
|
||||
return nullptr;
|
||||
NumGadgets += GadgetCount;
|
||||
|
||||
// Traverse CFG to build the rest of the graph
|
||||
SmallSet<MachineBasicBlock *, 8> BlocksVisited;
|
||||
std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
|
||||
[&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
|
||||
unsigned LoopDepth = MLI.getLoopDepth(MBB);
|
||||
if (!MBB->empty()) {
|
||||
// Always add the first instruction in each block
|
||||
auto NI = MBB->begin();
|
||||
auto BeginBB = MaybeAddNode(&*NI);
|
||||
Builder.addEdge(ParentDepth, GI, BeginBB.first);
|
||||
if (!BlocksVisited.insert(MBB).second)
|
||||
return;
|
||||
|
||||
// Add any instructions within the block that are gadget components
|
||||
GI = BeginBB.first;
|
||||
while (++NI != MBB->end()) {
|
||||
auto Ref = NodeMap.find(&*NI);
|
||||
if (Ref != NodeMap.end()) {
|
||||
Builder.addEdge(LoopDepth, GI, Ref->getSecond());
|
||||
GI = Ref->getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
// Always add the terminator instruction, if one exists
|
||||
auto T = MBB->getFirstTerminator();
|
||||
if (T != MBB->end()) {
|
||||
auto EndBB = MaybeAddNode(&*T);
|
||||
if (EndBB.second)
|
||||
Builder.addEdge(LoopDepth, GI, EndBB.first);
|
||||
GI = EndBB.first;
|
||||
}
|
||||
}
|
||||
for (MachineBasicBlock *Succ : MBB->successors())
|
||||
TraverseCFG(Succ, GI, LoopDepth);
|
||||
};
|
||||
// ArgNodeSentinel is a pseudo-instruction that represents MF args in the
|
||||
// GadgetGraph
|
||||
GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
|
||||
TraverseCFG(&MF.front(), ArgNode, 0);
|
||||
std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
|
||||
LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
|
||||
return G;
|
||||
}
|
||||
|
||||
// Returns the number of remaining gadget edges that could not be eliminated
|
||||
int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
|
||||
MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
|
||||
MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
|
||||
if (G.NumFences > 0) {
|
||||
// Eliminate fences and CFG edges that ingress and egress the fence, as
|
||||
// they are trivially mitigated.
|
||||
for (const auto &E : G.edges()) {
|
||||
const MachineGadgetGraph::Node *Dest = E.getDest();
|
||||
if (isFence(Dest->getValue())) {
|
||||
ElimNodes.insert(*Dest);
|
||||
ElimEdges.insert(E);
|
||||
for (const auto &DE : Dest->edges())
|
||||
ElimEdges.insert(DE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find and eliminate gadget edges that have been mitigated.
|
||||
int MitigatedGadgets = 0, RemainingGadgets = 0;
|
||||
MachineGadgetGraph::NodeSet ReachableNodes{G};
|
||||
for (const auto &RootN : G.nodes()) {
|
||||
if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
|
||||
continue; // skip this node if it isn't a gadget source
|
||||
|
||||
// Find all of the nodes that are CFG-reachable from RootN using DFS
|
||||
ReachableNodes.clear();
|
||||
std::function<void(const MachineGadgetGraph::Node *, bool)>
|
||||
FindReachableNodes =
|
||||
[&](const MachineGadgetGraph::Node *N, bool FirstNode) {
|
||||
if (!FirstNode)
|
||||
ReachableNodes.insert(*N);
|
||||
for (const auto &E : N->edges()) {
|
||||
const MachineGadgetGraph::Node *Dest = E.getDest();
|
||||
if (MachineGadgetGraph::isCFGEdge(E) &&
|
||||
!ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
|
||||
FindReachableNodes(Dest, false);
|
||||
}
|
||||
};
|
||||
FindReachableNodes(&RootN, true);
|
||||
|
||||
// Any gadget whose sink is unreachable has been mitigated
|
||||
for (const auto &E : RootN.edges()) {
|
||||
if (MachineGadgetGraph::isGadgetEdge(E)) {
|
||||
if (ReachableNodes.contains(*E.getDest())) {
|
||||
// This gadget's sink is reachable
|
||||
++RemainingGadgets;
|
||||
} else { // This gadget's sink is unreachable, and therefore mitigated
|
||||
++MitigatedGadgets;
|
||||
ElimEdges.insert(E);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RemainingGadgets;
|
||||
}
|
||||
|
||||
std::unique_ptr<MachineGadgetGraph>
|
||||
X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
|
||||
std::unique_ptr<MachineGadgetGraph> Graph) const {
|
||||
MachineGadgetGraph::NodeSet ElimNodes{*Graph};
|
||||
MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
|
||||
int RemainingGadgets =
|
||||
elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
|
||||
if (ElimEdges.empty() && ElimNodes.empty()) {
|
||||
Graph->NumFences = 0;
|
||||
Graph->NumGadgets = RemainingGadgets;
|
||||
} else {
|
||||
Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
|
||||
RemainingGadgets);
|
||||
}
|
||||
return Graph;
|
||||
}
|
||||
|
||||
int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
|
||||
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
|
||||
int FencesInserted = 0;
|
||||
|
||||
do {
|
||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
|
||||
Graph = trimMitigatedEdges(std::move(Graph));
|
||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
|
||||
if (Graph->NumGadgets == 0)
|
||||
break;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
|
||||
EdgeSet CutEdges{*Graph};
|
||||
auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
|
||||
1 /* terminator node */);
|
||||
auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
|
||||
auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
|
||||
auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
|
||||
for (const auto &N : Graph->nodes()) {
|
||||
Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
|
||||
}
|
||||
Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
|
||||
for (const auto &E : Graph->edges()) {
|
||||
Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
|
||||
EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
|
||||
}
|
||||
OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
|
||||
EdgeCuts.get(), Graph->edges_size());
|
||||
for (int I = 0; I < Graph->edges_size(); ++I)
|
||||
if (EdgeCuts[I])
|
||||
CutEdges.set(I);
|
||||
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
|
||||
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
|
||||
FencesInserted += insertFences(MF, *Graph, CutEdges);
|
||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
|
||||
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
|
||||
|
||||
Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
|
||||
CutEdges);
|
||||
} while (true);
|
||||
|
||||
return FencesInserted;
|
||||
}
|
||||
|
||||
int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
|
||||
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
|
||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
|
||||
Graph = trimMitigatedEdges(std::move(Graph));
|
||||
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
|
||||
if (Graph->NumGadgets == 0)
|
||||
return 0;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
|
||||
MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
|
||||
MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
|
||||
auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
|
||||
return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
|
||||
MachineGadgetGraph::isCFGEdge(E);
|
||||
};
|
||||
auto IsGadgetEdge = [&ElimEdges,
|
||||
&CutEdges](const MachineGadgetGraph::Edge &E) {
|
||||
return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
|
||||
MachineGadgetGraph::isGadgetEdge(E);
|
||||
};
|
||||
|
||||
// FIXME: this is O(E^2), we could probably do better.
|
||||
do {
|
||||
// Find the cheapest CFG edge that will eliminate a gadget (by being
|
||||
// egress from a SOURCE node or ingress to a SINK node), and cut it.
|
||||
const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
|
||||
|
||||
// First, collect all gadget source and sink nodes.
|
||||
MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
|
||||
for (const auto &N : Graph->nodes()) {
|
||||
if (ElimNodes.contains(N))
|
||||
continue;
|
||||
for (const auto &E : N.edges()) {
|
||||
if (IsGadgetEdge(E)) {
|
||||
GadgetSources.insert(N);
|
||||
GadgetSinks.insert(*E.getDest());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Next, look for the cheapest CFG edge which, when cut, is guaranteed to
|
||||
// mitigate at least one gadget by either:
|
||||
// (a) being egress from a gadget source, or
|
||||
// (b) being ingress to a gadget sink.
|
||||
for (const auto &N : Graph->nodes()) {
|
||||
if (ElimNodes.contains(N))
|
||||
continue;
|
||||
for (const auto &E : N.edges()) {
|
||||
if (IsCFGEdge(E)) {
|
||||
if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
|
||||
if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
|
||||
CheapestSoFar = &E;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(CheapestSoFar && "Failed to cut an edge");
|
||||
CutEdges.insert(*CheapestSoFar);
|
||||
ElimEdges.insert(*CheapestSoFar);
|
||||
} while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
|
||||
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
|
||||
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
|
||||
|
||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
|
||||
int FencesInserted = insertFences(MF, *Graph, CutEdges);
|
||||
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
|
||||
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
|
||||
|
||||
return FencesInserted;
|
||||
}
|
||||
|
||||
int X86LoadValueInjectionLoadHardeningPass::insertFences(
|
||||
MachineFunction &MF, MachineGadgetGraph &G,
|
||||
EdgeSet &CutEdges /* in, out */) const {
|
||||
int FencesInserted = 0;
|
||||
for (const auto &N : G.nodes()) {
|
||||
for (const auto &E : N.edges()) {
|
||||
if (CutEdges.contains(E)) {
|
||||
MachineInstr *MI = N.getValue(), *Prev;
|
||||
MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
|
||||
MachineBasicBlock::iterator InsertionPt; // ...at this point
|
||||
if (MI == MachineGadgetGraph::ArgNodeSentinel) {
|
||||
// insert LFENCE at beginning of entry block
|
||||
MBB = &MF.front();
|
||||
InsertionPt = MBB->begin();
|
||||
Prev = nullptr;
|
||||
} else if (MI->isBranch()) { // insert the LFENCE before the branch
|
||||
MBB = MI->getParent();
|
||||
InsertionPt = MI;
|
||||
Prev = MI->getPrevNode();
|
||||
// Remove all egress CFG edges from this branch because the inserted
|
||||
// LFENCE prevents gadgets from crossing the branch.
|
||||
for (const auto &E : N.edges()) {
|
||||
if (MachineGadgetGraph::isCFGEdge(E))
|
||||
CutEdges.insert(E);
|
||||
}
|
||||
} else { // insert the LFENCE after the instruction
|
||||
MBB = MI->getParent();
|
||||
InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
|
||||
Prev = InsertionPt == MBB->end()
|
||||
? (MBB->empty() ? nullptr : &MBB->back())
|
||||
: InsertionPt->getPrevNode();
|
||||
}
|
||||
// Ensure this insertion is not redundant (two LFENCEs in sequence).
|
||||
if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
|
||||
(!Prev || !isFence(Prev))) {
|
||||
BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
|
||||
++FencesInserted;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return FencesInserted;
|
||||
}
|
||||
|
||||
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
|
||||
const MachineInstr &MI, unsigned Reg) const {
|
||||
if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
|
||||
MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
|
||||
return false;
|
||||
|
||||
// FIXME: This does not handle pseudo loading instruction like TCRETURN*
|
||||
const MCInstrDesc &Desc = MI.getDesc();
|
||||
int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
|
||||
if (MemRefBeginIdx < 0) {
|
||||
LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
|
||||
"instruction:\n";
|
||||
MI.print(dbgs()); dbgs() << '\n';);
|
||||
return false;
|
||||
}
|
||||
MemRefBeginIdx += X86II::getOperandBias(Desc);
|
||||
|
||||
const MachineOperand &BaseMO =
|
||||
MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
|
||||
const MachineOperand &IndexMO =
|
||||
MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
|
||||
return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
|
||||
TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
|
||||
(IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
|
||||
TRI->regsOverlap(IndexMO.getReg(), Reg));
|
||||
}
|
||||
|
||||
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
|
||||
const MachineInstr &MI, unsigned Reg) const {
|
||||
if (!MI.isConditionalBranch())
|
||||
return false;
|
||||
for (const MachineOperand &Use : MI.uses())
|
||||
if (Use.isReg() && Use.getReg() == Reg)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
|
||||
"X86 LVI load hardening", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
|
||||
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
|
||||
INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
|
||||
"X86 LVI load hardening", false, false)
|
||||
|
||||
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
|
||||
return new X86LoadValueInjectionLoadHardeningPass();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
|
||||
/// analysis passes that add complexity to the pipeline. This complexity
|
||||
/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
|
||||
/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
|
||||
/// provide the same security as the optimized pass, but without adding
|
||||
/// unnecessary complexity to the LLVM pipeline.
|
||||
///
|
||||
/// The behavior of this pass is simply to insert an LFENCE after every load
|
||||
/// instruction.
|
||||
class X86LoadValueInjectionLoadHardeningUnoptimizedPass
|
||||
: public MachineFunctionPass {
|
||||
public:
|
||||
X86LoadValueInjectionLoadHardeningUnoptimizedPass()
|
||||
: MachineFunctionPass(ID) {}
|
||||
|
||||
StringRef getPassName() const override {
|
||||
return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
|
||||
}
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
static char ID;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
|
||||
|
||||
bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
|
||||
MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
|
||||
<< " *****\n");
|
||||
const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
|
||||
if (!STI->useLVILoadHardening())
|
||||
return false;
|
||||
|
||||
// FIXME: support 32-bit
|
||||
if (!STI->is64Bit())
|
||||
report_fatal_error("LVI load hardening is only supported on 64-bit", false);
|
||||
|
||||
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
|
||||
const Function &F = MF.getFunction();
|
||||
if (!F.hasOptNone() && skipFunction(F))
|
||||
return false;
|
||||
|
||||
bool Modified = false;
|
||||
++NumFunctionsConsidered;
|
||||
|
||||
const TargetInstrInfo *TII = STI->getInstrInfo();
|
||||
for (auto &MBB : MF) {
|
||||
for (auto &MI : MBB) {
|
||||
if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
|
||||
MI.getOpcode() == X86::MFENCE)
|
||||
continue;
|
||||
|
||||
MachineBasicBlock::iterator InsertionPt =
|
||||
MI.getNextNode() ? MI.getNextNode() : MBB.end();
|
||||
BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
|
||||
++NumFences;
|
||||
Modified = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (Modified)
|
||||
++NumFunctionsMitigated;
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
|
||||
"X86 LVI load hardening", false, false)
|
||||
|
||||
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
|
||||
return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
|
||||
}
|
@ -0,0 +1,143 @@
|
||||
//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// Description: Replaces every `ret` instruction with the sequence:
|
||||
/// ```
|
||||
/// pop <scratch-reg>
|
||||
/// lfence
|
||||
/// jmp *<scratch-reg>
|
||||
/// ```
|
||||
/// where `<scratch-reg>` is some available scratch register, according to the
|
||||
/// calling convention of the function being mitigated.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include <bitset>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define PASS_KEY "x86-lvi-ret"
|
||||
#define DEBUG_TYPE PASS_KEY
|
||||
|
||||
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
|
||||
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
|
||||
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
|
||||
"were deployed");
|
||||
|
||||
namespace {
|
||||
|
||||
class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
|
||||
public:
|
||||
X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
|
||||
StringRef getPassName() const override {
|
||||
return "X86 Load Value Injection (LVI) Ret-Hardening";
|
||||
}
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
static char ID;
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
char X86LoadValueInjectionRetHardeningPass::ID = 0;
|
||||
|
||||
bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
|
||||
MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
|
||||
<< " *****\n");
|
||||
const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
|
||||
if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
|
||||
return false; // FIXME: support 32-bit
|
||||
|
||||
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
|
||||
const Function &F = MF.getFunction();
|
||||
if (!F.hasOptNone() && skipFunction(F))
|
||||
return false;
|
||||
|
||||
++NumFunctionsConsidered;
|
||||
const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
|
||||
const X86InstrInfo *TII = Subtarget->getInstrInfo();
|
||||
unsigned ClobberReg = X86::NoRegister;
|
||||
std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
|
||||
UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
|
||||
UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
|
||||
UnclobberableGR64s.set(X86::RAX); // used for function return
|
||||
UnclobberableGR64s.set(X86::RDX); // used for function return
|
||||
|
||||
// We can clobber any register allowed by the function's calling convention.
|
||||
for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
|
||||
UnclobberableGR64s.set(Reg);
|
||||
for (auto &Reg : X86::GR64RegClass) {
|
||||
if (!UnclobberableGR64s.test(Reg)) {
|
||||
ClobberReg = Reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ClobberReg != X86::NoRegister) {
|
||||
LLVM_DEBUG(dbgs() << "Selected register "
|
||||
<< Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
|
||||
<< " to clobber\n");
|
||||
} else {
|
||||
LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
|
||||
}
|
||||
|
||||
bool Modified = false;
|
||||
for (auto &MBB : MF) {
|
||||
if (MBB.empty())
|
||||
continue;
|
||||
|
||||
MachineInstr &MI = MBB.back();
|
||||
if (MI.getOpcode() != X86::RETQ)
|
||||
continue;
|
||||
|
||||
if (ClobberReg != X86::NoRegister) {
|
||||
MBB.erase_instr(&MI);
|
||||
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
|
||||
.addReg(ClobberReg, RegState::Define)
|
||||
.setMIFlag(MachineInstr::FrameDestroy);
|
||||
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
|
||||
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
|
||||
.addReg(ClobberReg);
|
||||
} else {
|
||||
// In case there is no available scratch register, we can still read from
|
||||
// RSP to assert that RSP points to a valid page. The write to RSP is
|
||||
// also helpful because it verifies that the stack's write permissions
|
||||
// are intact.
|
||||
MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
|
||||
addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
|
||||
X86::RSP, false, 0)
|
||||
.addImm(0)
|
||||
->addRegisterDead(X86::EFLAGS, TRI);
|
||||
}
|
||||
|
||||
++NumFences;
|
||||
Modified = true;
|
||||
}
|
||||
|
||||
if (Modified)
|
||||
++NumFunctionsMitigated;
|
||||
return Modified;
|
||||
}
|
||||
|
||||
INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
|
||||
"X86 LVI ret hardener", false, false)
|
||||
|
||||
FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
|
||||
return new X86LoadValueInjectionRetHardeningPass();
|
||||
}
|
@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
|
||||
break;
|
||||
case MachineOperand::MO_Register:
|
||||
// FIXME: Add retpoline support and remove this.
|
||||
if (Subtarget->useRetpolineIndirectCalls())
|
||||
report_fatal_error("Lowering register statepoints with retpoline not "
|
||||
if (Subtarget->useIndirectThunkCalls())
|
||||
report_fatal_error("Lowering register statepoints with thunks not "
|
||||
"yet implemented.");
|
||||
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
|
||||
CallOpcode = X86::CALL64r;
|
||||
@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
|
||||
EmitAndCountInstruction(
|
||||
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
|
||||
// FIXME: Add retpoline support and remove this.
|
||||
if (Subtarget->useRetpolineIndirectCalls())
|
||||
if (Subtarget->useIndirectThunkCalls())
|
||||
report_fatal_error(
|
||||
"Lowering patchpoint with retpoline not yet implemented.");
|
||||
"Lowering patchpoint with thunks not yet implemented.");
|
||||
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
|
||||
}
|
||||
|
||||
|
@ -1,286 +0,0 @@
|
||||
//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \file
|
||||
///
|
||||
/// Pass that injects an MI thunk implementing a "retpoline". This is
|
||||
/// a RET-implemented trampoline that is used to lower indirect calls in a way
|
||||
/// that prevents speculation on some x86 processors and can be used to mitigate
|
||||
/// security vulnerabilities due to targeted speculative execution and side
|
||||
/// channels such as CVE-2017-5715.
|
||||
///
|
||||
/// TODO(chandlerc): All of this code could use better comments and
|
||||
/// documentation.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "X86.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86Subtarget.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "x86-retpoline-thunks"
|
||||
|
||||
static const char ThunkNamePrefix[] = "__llvm_retpoline_";
|
||||
static const char R11ThunkName[] = "__llvm_retpoline_r11";
|
||||
static const char EAXThunkName[] = "__llvm_retpoline_eax";
|
||||
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
|
||||
static const char EDXThunkName[] = "__llvm_retpoline_edx";
|
||||
static const char EDIThunkName[] = "__llvm_retpoline_edi";
|
||||
|
||||
namespace {
|
||||
class X86RetpolineThunks : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
X86RetpolineThunks() : MachineFunctionPass(ID) {}
|
||||
|
||||
StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnMachineFunction(MachineFunction &F) override;
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
AU.addRequired<MachineModuleInfoWrapperPass>();
|
||||
AU.addPreserved<MachineModuleInfoWrapperPass>();
|
||||
}
|
||||
|
||||
private:
|
||||
MachineModuleInfo *MMI = nullptr;
|
||||
const TargetMachine *TM = nullptr;
|
||||
bool Is64Bit = false;
|
||||
const X86Subtarget *STI = nullptr;
|
||||
const X86InstrInfo *TII = nullptr;
|
||||
|
||||
bool InsertedThunks = false;
|
||||
|
||||
void createThunkFunction(Module &M, StringRef Name);
|
||||
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
|
||||
void populateThunk(MachineFunction &MF, unsigned Reg);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
FunctionPass *llvm::createX86RetpolineThunksPass() {
|
||||
return new X86RetpolineThunks();
|
||||
}
|
||||
|
||||
char X86RetpolineThunks::ID = 0;
|
||||
|
||||
bool X86RetpolineThunks::doInitialization(Module &M) {
|
||||
InsertedThunks = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
|
||||
LLVM_DEBUG(dbgs() << getPassName() << '\n');
|
||||
|
||||
TM = &MF.getTarget();;
|
||||
STI = &MF.getSubtarget<X86Subtarget>();
|
||||
TII = STI->getInstrInfo();
|
||||
Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
|
||||
|
||||
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
|
||||
Module &M = const_cast<Module &>(*MMI->getModule());
|
||||
|
||||
// If this function is not a thunk, check to see if we need to insert
|
||||
// a thunk.
|
||||
if (!MF.getName().startswith(ThunkNamePrefix)) {
|
||||
// If we've already inserted a thunk, nothing else to do.
|
||||
if (InsertedThunks)
|
||||
return false;
|
||||
|
||||
// Only add a thunk if one of the functions has the retpoline feature
|
||||
// enabled in its subtarget, and doesn't enable external thunks.
|
||||
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
|
||||
// nothing will end up calling it.
|
||||
// FIXME: It's a little silly to look at every function just to enumerate
|
||||
// the subtargets, but eventually we'll want to look at them for indirect
|
||||
// calls, so maybe this is OK.
|
||||
if ((!STI->useRetpolineIndirectCalls() &&
|
||||
!STI->useRetpolineIndirectBranches()) ||
|
||||
STI->useRetpolineExternalThunk())
|
||||
return false;
|
||||
|
||||
// Otherwise, we need to insert the thunk.
|
||||
// WARNING: This is not really a well behaving thing to do in a function
|
||||
// pass. We extract the module and insert a new function (and machine
|
||||
// function) directly into the module.
|
||||
if (Is64Bit)
|
||||
createThunkFunction(M, R11ThunkName);
|
||||
else
|
||||
for (StringRef Name :
|
||||
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
|
||||
createThunkFunction(M, Name);
|
||||
InsertedThunks = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If this *is* a thunk function, we need to populate it with the correct MI.
|
||||
if (Is64Bit) {
|
||||
assert(MF.getName() == "__llvm_retpoline_r11" &&
|
||||
"Should only have an r11 thunk on 64-bit targets");
|
||||
|
||||
// __llvm_retpoline_r11:
|
||||
// callq .Lr11_call_target
|
||||
// .Lr11_capture_spec:
|
||||
// pause
|
||||
// lfence
|
||||
// jmp .Lr11_capture_spec
|
||||
// .align 16
|
||||
// .Lr11_call_target:
|
||||
// movq %r11, (%rsp)
|
||||
// retq
|
||||
populateThunk(MF, X86::R11);
|
||||
} else {
|
||||
// For 32-bit targets we need to emit a collection of thunks for various
|
||||
// possible scratch registers as well as a fallback that uses EDI, which is
|
||||
// normally callee saved.
|
||||
// __llvm_retpoline_eax:
|
||||
// calll .Leax_call_target
|
||||
// .Leax_capture_spec:
|
||||
// pause
|
||||
// jmp .Leax_capture_spec
|
||||
// .align 16
|
||||
// .Leax_call_target:
|
||||
// movl %eax, (%esp) # Clobber return addr
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_ecx:
|
||||
// ... # Same setup
|
||||
// movl %ecx, (%esp)
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_edx:
|
||||
// ... # Same setup
|
||||
// movl %edx, (%esp)
|
||||
// retl
|
||||
//
|
||||
// __llvm_retpoline_edi:
|
||||
// ... # Same setup
|
||||
// movl %edi, (%esp)
|
||||
// retl
|
||||
if (MF.getName() == EAXThunkName)
|
||||
populateThunk(MF, X86::EAX);
|
||||
else if (MF.getName() == ECXThunkName)
|
||||
populateThunk(MF, X86::ECX);
|
||||
else if (MF.getName() == EDXThunkName)
|
||||
populateThunk(MF, X86::EDX);
|
||||
else if (MF.getName() == EDIThunkName)
|
||||
populateThunk(MF, X86::EDI);
|
||||
else
|
||||
llvm_unreachable("Invalid thunk name on x86-32!");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
|
||||
assert(Name.startswith(ThunkNamePrefix) &&
|
||||
"Created a thunk with an unexpected prefix!");
|
||||
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
|
||||
Function *F =
|
||||
Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
|
||||
F->setVisibility(GlobalValue::HiddenVisibility);
|
||||
F->setComdat(M.getOrInsertComdat(Name));
|
||||
|
||||
// Add Attributes so that we don't create a frame, unwind information, or
|
||||
// inline.
|
||||
AttrBuilder B;
|
||||
B.addAttribute(llvm::Attribute::NoUnwind);
|
||||
B.addAttribute(llvm::Attribute::Naked);
|
||||
F->addAttributes(llvm::AttributeList::FunctionIndex, B);
|
||||
|
||||
// Populate our function a bit so that we can verify.
|
||||
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
|
||||
IRBuilder<> Builder(Entry);
|
||||
|
||||
Builder.CreateRetVoid();
|
||||
|
||||
// MachineFunctions/MachineBasicBlocks aren't created automatically for the
|
||||
// IR-level constructs we already made. Create them and insert them into the
|
||||
// module.
|
||||
MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
|
||||
MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
|
||||
|
||||
// Insert EntryMBB into MF. It's not in the module until we do this.
|
||||
MF.insert(MF.end(), EntryMBB);
|
||||
}
|
||||
|
||||
void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
|
||||
unsigned Reg) {
|
||||
const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
|
||||
const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
|
||||
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
|
||||
.addReg(Reg);
|
||||
}
|
||||
|
||||
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
|
||||
unsigned Reg) {
|
||||
// Set MF properties. We never use vregs...
|
||||
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
|
||||
|
||||
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
|
||||
// generate two bbs for the entry block.
|
||||
MachineBasicBlock *Entry = &MF.front();
|
||||
Entry->clear();
|
||||
while (MF.size() > 1)
|
||||
MF.erase(std::next(MF.begin()));
|
||||
|
||||
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
|
||||
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
|
||||
MCSymbol *TargetSym = MF.getContext().createTempSymbol();
|
||||
MF.push_back(CaptureSpec);
|
||||
MF.push_back(CallTarget);
|
||||
|
||||
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
|
||||
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
|
||||
|
||||
Entry->addLiveIn(Reg);
|
||||
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
|
||||
|
||||
// The MIR verifier thinks that the CALL in the entry block will fall through
|
||||
// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
|
||||
// the successor, but the MIR verifier doesn't know how to cope with that.
|
||||
Entry->addSuccessor(CaptureSpec);
|
||||
|
||||
// In the capture loop for speculation, we want to stop the processor from
|
||||
// speculating as fast as possible. On Intel processors, the PAUSE instruction
|
||||
// will block speculation without consuming any execution resources. On AMD
|
||||
// processors, the PAUSE instruction is (essentially) a nop, so we also use an
|
||||
// LFENCE instruction which they have advised will stop speculation as well
|
||||
// with minimal resource utilization. We still end the capture with a jump to
|
||||
// form an infinite loop to fully guarantee that no matter what implementation
|
||||
// of the x86 ISA, speculating this code path never escapes.
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
|
||||
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
|
||||
CaptureSpec->setHasAddressTaken();
|
||||
CaptureSpec->addSuccessor(CaptureSpec);
|
||||
|
||||
CallTarget->addLiveIn(Reg);
|
||||
CallTarget->setHasAddressTaken();
|
||||
CallTarget->setAlignment(Align(16));
|
||||
insertRegReturnAddrClobber(*CallTarget, Reg);
|
||||
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
|
||||
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
|
||||
}
|
@ -421,6 +421,16 @@ protected:
|
||||
/// than emitting one inside the compiler.
|
||||
bool UseRetpolineExternalThunk = false;
|
||||
|
||||
/// Prevent generation of indirect call/branch instructions from memory,
|
||||
/// and force all indirect call/branch instructions from a register to be
|
||||
/// preceded by an LFENCE. Also decompose RET instructions into a
|
||||
/// POP+LFENCE+JMP sequence.
|
||||
bool UseLVIControlFlowIntegrity = false;
|
||||
|
||||
/// Insert LFENCE instructions to prevent data speculatively injected into
|
||||
/// loads from being used maliciously.
|
||||
bool UseLVILoadHardening = false;
|
||||
|
||||
/// Use software floating point for code generation.
|
||||
bool UseSoftFloat = false;
|
||||
|
||||
@ -707,8 +717,21 @@ public:
|
||||
return UseRetpolineIndirectBranches;
|
||||
}
|
||||
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
|
||||
|
||||
// These are generic getters that OR together all of the thunk types
|
||||
// supported by the subtarget. Therefore useIndirectThunk*() will return true
|
||||
// if any respective thunk feature is enabled.
|
||||
bool useIndirectThunkCalls() const {
|
||||
return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
|
||||
}
|
||||
bool useIndirectThunkBranches() const {
|
||||
return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
|
||||
}
|
||||
|
||||
bool preferMaskRegisters() const { return PreferMaskRegisters; }
|
||||
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
|
||||
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
|
||||
bool useLVILoadHardening() const { return UseLVILoadHardening; }
|
||||
|
||||
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
|
||||
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
|
||||
@ -853,10 +876,10 @@ public:
|
||||
/// Return true if the subtarget allows calls to immediate address.
|
||||
bool isLegalToCallImmediateAddr() const;
|
||||
|
||||
/// If we are using retpolines, we need to expand indirectbr to avoid it
|
||||
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
|
||||
/// lowering to an actual indirect jump.
|
||||
bool enableIndirectBrExpand() const override {
|
||||
return useRetpolineIndirectBranches();
|
||||
return useIndirectThunkBranches();
|
||||
}
|
||||
|
||||
/// Enable the MachineScheduler pass for all X86 subtargets.
|
||||
|
@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
|
||||
initializeX86SpeculativeLoadHardeningPassPass(PR);
|
||||
initializeX86FlagsCopyLoweringPassPass(PR);
|
||||
initializeX86CondBrFoldingPassPass(PR);
|
||||
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
|
||||
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
|
||||
initializeX86OptimizeLEAPassPass(PR);
|
||||
}
|
||||
|
||||
@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
|
||||
|
||||
void X86PassConfig::addPostRegAlloc() {
|
||||
addPass(createX86FloatingPointStackifierPass());
|
||||
if (getOptLevel() != CodeGenOpt::None)
|
||||
addPass(createX86LoadValueInjectionLoadHardeningPass());
|
||||
else
|
||||
addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
|
||||
}
|
||||
|
||||
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
|
||||
@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
|
||||
const Triple &TT = TM->getTargetTriple();
|
||||
const MCAsmInfo *MAI = TM->getMCAsmInfo();
|
||||
|
||||
addPass(createX86RetpolineThunksPass());
|
||||
addPass(createX86IndirectThunksPass());
|
||||
|
||||
// Insert extra int3 instructions after trailing call instructions to avoid
|
||||
// issues in the unwinder.
|
||||
@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
|
||||
// Identify valid longjmp targets for Windows Control Flow Guard.
|
||||
if (TT.isOSWindows())
|
||||
addPass(createCFGuardLongjmpPass());
|
||||
addPass(createX86LoadValueInjectionRetHardeningPass());
|
||||
}
|
||||
|
||||
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {
|
||||
|
@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
|
||||
Constant *C2;
|
||||
|
||||
// C-(C2-X) --> X+(C-C2)
|
||||
if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
|
||||
if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
|
||||
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
|
||||
|
||||
// C-(X+C2) --> (C-C2)-X
|
||||
|
@ -1,14 +1,14 @@
|
||||
// $FreeBSD$
|
||||
|
||||
#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
|
||||
#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
|
||||
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
||||
#define CLANG_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
|
||||
#define CLANG_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
|
||||
#define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
||||
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
|
||||
#define LLD_REVISION "llvmorg-10.0.0-97-g6f71678ecd2-1300007"
|
||||
#define LLD_REVISION "llvmorg-10.0.0-129-gd24d5c8e308-1300007"
|
||||
#define LLD_REPOSITORY "FreeBSD"
|
||||
|
||||
#define LLDB_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
|
||||
#define LLDB_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
|
||||
#define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
@ -1,3 +1,3 @@
|
||||
/* $FreeBSD$ */
|
||||
#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
|
||||
#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
|
||||
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
@ -318,6 +318,9 @@ SRCS_MIN+= CodeGen/ProcessImplicitDefs.cpp
|
||||
SRCS_MIN+= CodeGen/PrologEpilogInserter.cpp
|
||||
SRCS_MIN+= CodeGen/PseudoSourceValue.cpp
|
||||
SRCS_MIN+= CodeGen/ReachingDefAnalysis.cpp
|
||||
SRCS_MIN+= CodeGen/RDFGraph.cpp
|
||||
SRCS_MIN+= CodeGen/RDFLiveness.cpp
|
||||
SRCS_MIN+= CodeGen/RDFRegisters.cpp
|
||||
SRCS_MIN+= CodeGen/RegAllocBase.cpp
|
||||
SRCS_MIN+= CodeGen/RegAllocBasic.cpp
|
||||
SRCS_MIN+= CodeGen/RegAllocFast.cpp
|
||||
@ -1284,6 +1287,7 @@ SRCS_MIN+= Target/X86/X86FrameLowering.cpp
|
||||
SRCS_MIN+= Target/X86/X86ISelDAGToDAG.cpp
|
||||
SRCS_MIN+= Target/X86/X86ISelLowering.cpp
|
||||
SRCS_MIN+= Target/X86/X86IndirectBranchTracking.cpp
|
||||
SRCS_MIN+= Target/X86/X86IndirectThunks.cpp
|
||||
SRCS_MIN+= Target/X86/X86InsertPrefetch.cpp
|
||||
SRCS_MIN+= Target/X86/X86InstrFMA3Info.cpp
|
||||
SRCS_MIN+= Target/X86/X86InstrFoldTables.cpp
|
||||
@ -1291,6 +1295,8 @@ SRCS_MIN+= Target/X86/X86InstrInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86InstructionSelector.cpp
|
||||
SRCS_MIN+= Target/X86/X86InterleavedAccess.cpp
|
||||
SRCS_MIN+= Target/X86/X86LegalizerInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86LoadValueInjectionLoadHardening.cpp
|
||||
SRCS_MIN+= Target/X86/X86LoadValueInjectionRetHardening.cpp
|
||||
SRCS_MIN+= Target/X86/X86MCInstLower.cpp
|
||||
SRCS_MIN+= Target/X86/X86MachineFunctionInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86MacroFusion.cpp
|
||||
@ -1298,7 +1304,6 @@ SRCS_MIN+= Target/X86/X86OptimizeLEAs.cpp
|
||||
SRCS_MIN+= Target/X86/X86PadShortFunction.cpp
|
||||
SRCS_MIN+= Target/X86/X86RegisterBankInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86RegisterInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86RetpolineThunks.cpp
|
||||
SRCS_MIN+= Target/X86/X86SelectionDAGInfo.cpp
|
||||
SRCS_MIN+= Target/X86/X86ShuffleDecodeConstantPool.cpp
|
||||
SRCS_MIN+= Target/X86/X86SpeculativeLoadHardening.cpp
|
||||
|
Loading…
x
Reference in New Issue
Block a user