Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp

llvmorg-10.0.0-129-gd24d5c8e308. Getting closer to 10.0.1-rc2.

MFC after:	3 weeks
This commit is contained in:
dim 2020-06-25 08:15:10 +00:00
commit d3a8f8c8d5
53 changed files with 2197 additions and 410 deletions

View File

@ -2267,6 +2267,14 @@ def mspeculative_load_hardening : Flag<["-"], "mspeculative-load-hardening">,
Group<m_Group>, Flags<[CoreOption,CC1Option]>;
def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">,
Group<m_Group>, Flags<[CoreOption]>;
def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
HelpText<"Enable all mitigations for Load Value Injection (LVI)">;
def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
HelpText<"Disable mitigations for Load Value Injection (LVI)">;
def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">;
def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group<m_Group>, Flags<[CoreOption,DriverOption]>,
HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">;
def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
HelpText<"Enable linker relaxation">;

View File

@ -276,11 +276,12 @@ public:
break;
case 'Q': // Memory operand that is an offset from a register (it is
// usually better to use `m' or `es' in asm statements)
Info.setAllowsRegister();
LLVM_FALLTHROUGH;
case 'Z': // Memory operand that is an indexed or indirect from a
// register (it is usually better to use `m' or `es' in
// asm statements)
Info.setAllowsMemory();
Info.setAllowsRegister();
break;
case 'R': // AIX TOC entry
case 'a': // Address operand that is an indexed or indirect from a

View File

@ -454,8 +454,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
<< lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
}
if ((Kinds & SanitizerKind::ShadowCallStack) &&
TC.getTriple().getArch() == llvm::Triple::aarch64 &&
if ((Kinds & SanitizerKind::ShadowCallStack) && TC.getTriple().isAArch64() &&
!llvm::AArch64::isX18ReservedByDefault(TC.getTriple()) &&
!Args.hasArg(options::OPT_ffixed_x18)) {
D.Diag(diag::err_drv_argument_only_allowed_with)

View File

@ -954,15 +954,12 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
if (getTriple().getArch() == llvm::Triple::x86 ||
getTriple().getArch() == llvm::Triple::x86_64 ||
getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::wasm32 ||
getTriple().getArch() == llvm::Triple::wasm64)
getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
Res |= SanitizerKind::CFIICall;
if (getTriple().getArch() == llvm::Triple::x86_64 ||
getTriple().getArch() == llvm::Triple::aarch64)
if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
Res |= SanitizerKind::ShadowCallStack;
if (getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::aarch64_be)
if (getTriple().isAArch64())
Res |= SanitizerKind::MemTag;
return Res;
}

View File

@ -147,6 +147,7 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
// flags). This is a bit hacky but keeps existing usages working. We should
// consider deprecating this and instead warn if the user requests external
// retpoline thunks and *doesn't* request some form of retpolines.
auto SpectreOpt = clang::driver::options::ID::OPT_INVALID;
if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline,
options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening)) {
@ -154,12 +155,14 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
false)) {
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
SpectreOpt = options::OPT_mretpoline;
} else if (Args.hasFlag(options::OPT_mspeculative_load_hardening,
options::OPT_mno_speculative_load_hardening,
false)) {
// On x86, speculative load hardening relies on at least using retpolines
// for indirect calls.
Features.push_back("+retpoline-indirect-calls");
SpectreOpt = options::OPT_mspeculative_load_hardening;
}
} else if (Args.hasFlag(options::OPT_mretpoline_external_thunk,
options::OPT_mno_retpoline_external_thunk, false)) {
@ -167,6 +170,26 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
// eventually switch to an error here.
Features.push_back("+retpoline-indirect-calls");
Features.push_back("+retpoline-indirect-branches");
SpectreOpt = options::OPT_mretpoline_external_thunk;
}
auto LVIOpt = clang::driver::options::ID::OPT_INVALID;
if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening,
false)) {
Features.push_back("+lvi-load-hardening");
Features.push_back("+lvi-cfi"); // load hardening implies CFI protection
LVIOpt = options::OPT_mlvi_hardening;
} else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi,
false)) {
Features.push_back("+lvi-cfi");
LVIOpt = options::OPT_mlvi_cfi;
}
if (SpectreOpt != clang::driver::options::ID::OPT_INVALID &&
LVIOpt != clang::driver::options::ID::OPT_INVALID) {
D.Diag(diag::err_drv_argument_not_allowed_with)
<< D.getOpts().getOptionName(SpectreOpt)
<< D.getOpts().getOptionName(LVIOpt);
}
// Now add any that the user explicitly requested on the command line,

View File

@ -488,6 +488,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumudm : GCCBuiltin<"__builtin_altivec_vmsumudm">,
Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_v1i128_ty], [IntrNoMem]>;
def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_v4i32_ty], [IntrNoMem]>;

View File

@ -40,8 +40,8 @@ template <typename T, size_t N> struct object_deleter<T[N]> {
// constexpr, a dynamic initializer may be emitted depending on optimization
// settings. For the affected versions of MSVC, use the old linker
// initialization pattern of not providing a constructor and leaving the fields
// uninitialized.
#if !defined(_MSC_VER) || defined(__clang__)
// uninitialized. See http://llvm.org/PR41367 for details.
#if !defined(_MSC_VER) || (_MSC_VER >= 1925) || defined(__clang__)
#define LLVM_USE_CONSTEXPR_CTOR
#endif

View File

@ -959,6 +959,10 @@ def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = 1;
let MemoryVT = i32;
}
def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = 1;
let MemoryVT = f16;
}
def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = 1;
let MemoryVT = f32;
@ -1094,6 +1098,11 @@ def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
let IsStore = 1;
let MemoryVT = i32;
}
def truncstoref16 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = 1;
let MemoryVT = f16;
}
def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
(truncstore node:$val, node:$ptr)> {
let IsStore = 1;

View File

@ -8,8 +8,6 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
#include "RDFGraph.h"
#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@ -20,6 +18,8 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@ -753,8 +753,10 @@ RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
LR.insert(RegisterRef(R));
if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
LR.insert(RegisterRef(R));
if (!isFuncletEHPersonality(classifyEHPersonality(PF))) {
if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
LR.insert(RegisterRef(R));
}
return LR;
}

View File

@ -22,9 +22,6 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
#include "RDFLiveness.h"
#include "RDFGraph.h"
#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@ -33,6 +30,9 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"

View File

@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"

View File

@ -147,8 +147,17 @@ void llvm::computeLTOCacheKey(
// Include the hash for the current module
auto ModHash = Index.getModuleHash(ModuleID);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
std::vector<uint64_t> ExportsGUID;
ExportsGUID.reserve(ExportList.size());
for (const auto &VI : ExportList) {
auto GUID = VI.getGUID();
ExportsGUID.push_back(GUID);
}
// Sort the export list elements GUIDs.
llvm::sort(ExportsGUID);
for (uint64_t GUID : ExportsGUID) {
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&GUID, sizeof(GUID)));
}
@ -156,12 +165,23 @@ void llvm::computeLTOCacheKey(
// Include the hash for every module we import functions from. The set of
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
for (auto &Entry : ImportList) {
auto ModHash = Index.getModuleHash(Entry.first());
using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
std::vector<ImportMapIteratorTy> ImportModulesVector;
ImportModulesVector.reserve(ImportList.size());
for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
++It) {
ImportModulesVector.push_back(It);
}
llvm::sort(ImportModulesVector,
[](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
-> bool { return Lhs->getKey() < Rhs->getKey(); });
for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
auto ModHash = Index.getModuleHash(EntryIt->first());
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
AddUint64(Entry.second.size());
for (auto &Fn : Entry.second)
AddUint64(EntryIt->second.size());
for (auto &Fn : EntryIt->second)
AddUint64(Fn);
}

View File

@ -304,7 +304,7 @@ void BPFDAGToDAGISel::PreprocessLoad(SDNode *Node,
LLVM_DEBUG(dbgs() << "Replacing load of size " << size << " with constant "
<< val << '\n');
SDValue NVal = CurDAG->getConstant(val, DL, MVT::i64);
SDValue NVal = CurDAG->getConstant(val, DL, LD->getValueType(0));
// After replacement, the current node is dead, we need to
// go backward one step to make iterator still work

View File

@ -600,6 +600,38 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
TypeId = DIToIdMap[Ty];
// To handle the case like the following:
// struct t;
// typedef struct t _t;
// struct s1 { _t *c; };
// int test1(struct s1 *arg) { ... }
//
// struct t { int a; int b; };
// struct s2 { _t c; }
// int test2(struct s2 *arg) { ... }
//
// During traversing test1() argument, "_t" is recorded
// in DIToIdMap and a forward declaration fixup is created
// for "struct t" to avoid pointee type traversal.
//
// During traversing test2() argument, even if we see "_t" is
// already defined, we should keep moving to eventually
// bring in types for "struct t". Otherwise, the "struct s2"
// definition won't be correct.
if (Ty && (!CheckPointer || !SeenPointer)) {
if (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
unsigned Tag = DTy->getTag();
if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
uint32_t TmpTypeId;
visitTypeEntry(DTy->getBaseType(), TmpTypeId, CheckPointer,
SeenPointer);
}
}
}
return;
}

View File

@ -12,9 +12,6 @@
#include "HexagonInstrInfo.h"
#include "HexagonSubtarget.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
@ -27,6 +24,9 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCInstrDesc.h"

View File

@ -11,9 +11,6 @@
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "RDFCopy.h"
#include "RDFDeadCode.h"
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "RDFRegisters.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@ -24,6 +21,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"

View File

@ -11,13 +11,13 @@
//===----------------------------------------------------------------------===//
#include "RDFCopy.h"
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "RDFRegisters.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"

View File

@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "RDFRegisters.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/MachineFunction.h"
#include <map>
#include <vector>

View File

@ -9,13 +9,13 @@
// RDF-based generic dead code elimination.
#include "RDFDeadCode.h"
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/Support/Debug.h"
#include <queue>

View File

@ -23,8 +23,8 @@
#ifndef RDF_DEADCODE_H
#define RDF_DEADCODE_H
#include "RDFGraph.h"
#include "RDFLiveness.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/SetVector.h"
namespace llvm {

View File

@ -373,6 +373,7 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
VMSUMSHS,
VMSUMUBM,
VMSUMUHM,
VMSUMUDM,
VMSUMUHS,
VMULESB,
VMULESH,

View File

@ -166,6 +166,9 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
"Enable Hardware Transactional Memory instructions">;
def FeatureMFTB : SubtargetFeature<"", "FeatureMFTB", "true",
"Implement mftb using the mfspr instruction">;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
def FeaturePPCPreRASched:
SubtargetFeature<"ppc-prera-sched", "UsePPCPreRASchedStrategy", "true",
"Use PowerPC pre-RA scheduling strategy">;
@ -252,7 +255,8 @@ def ProcessorFeatures {
FeatureExtDiv,
FeatureMFTB,
DeprecatedDST,
FeatureTwoConstNR];
FeatureTwoConstNR,
FeatureUnalignedFloats];
list<SubtargetFeature> P7SpecificFeatures = [];
list<SubtargetFeature> P7Features =
!listconcat(P7InheritableFeatures, P7SpecificFeatures);

View File

@ -167,6 +167,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
if (Subtarget.isISA3_0()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
setTruncStoreAction(MVT::f64, MVT::f16, Legal);
setTruncStoreAction(MVT::f32, MVT::f16, Legal);
} else {
// No extending loads from f16 or HW conversions back and forth.
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PowerPC has pre-inc load and store's.
@ -677,6 +694,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
if (!Subtarget.hasP8Vector()) {
setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
@ -10361,6 +10379,7 @@ SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::FP_EXTEND &&
"Should only be called for ISD::FP_EXTEND");
// FIXME: handle extends from half precision float vectors on P9.
// We only want to custom lower an extend from v2f32 to v2f64.
if (Op.getValueType() != MVT::v2f64 ||
Op.getOperand(0).getValueType() != MVT::v2f32)
@ -10574,6 +10593,11 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
// Don't handle bitcast here.
return;
case ISD::FP_EXTEND:
SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
if (Lowered)
Results.push_back(Lowered);
return;
}
}
@ -15255,6 +15279,10 @@ bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple())
return false;
if (VT.isFloatingPoint() && !VT.isVector() &&
!Subtarget.allowsUnalignedFPAccess())
return false;
if (VT.getSimpleVT().isVector()) {
if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64 &&

View File

@ -637,7 +637,7 @@ namespace llvm {
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
if (VT.getScalarSizeInBits() % 8 == 0)
if (VT.getVectorNumElements() != 1 && VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}

View File

@ -1342,6 +1342,10 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
// Vector Multiply-Sum
def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
v1i128, v2i64, v1i128>;
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;

View File

@ -2631,6 +2631,10 @@ bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
return false;
// The operand may not necessarily be an immediate - it could be a relocation.
if (!ADDIMI.getOperand(2).isImm())
return false;
Imm = ADDIMI.getOperand(2).getImm();
return true;

View File

@ -3343,6 +3343,23 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
(v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
// Load/convert and convert/store patterns for f16.
def : Pat<(f64 (extloadf16 xoaddr:$src)),
(f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
(STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
def : Pat<(f32 (extloadf16 xoaddr:$src)),
(f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
(STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
def : Pat<(f64 (f16_to_fp i32:$A)),
(f64 (XSCVHPDP (MTVSRWZ $A)))>;
def : Pat<(f32 (f16_to_fp i32:$A)),
(f32 (COPY_TO_REGCLASS (XSCVHPDP (MTVSRWZ $A)), VSSRC))>;
def : Pat<(i32 (fp_to_f16 f32:$A)),
(i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
let Predicates = [IsBigEndian, HasP9Vector] in {
// Scalar stores of i8
def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),

View File

@ -124,6 +124,7 @@ protected:
bool IsPPC4xx;
bool IsPPC6xx;
bool FeatureMFTB;
bool AllowsUnalignedFPAccess;
bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsLittleEndian;
@ -274,6 +275,7 @@ public:
bool vectorsUseTwoUnits() const {return VectorsUseTwoUnits; }
bool isE500() const { return IsE500; }
bool isFeatureMFTB() const { return FeatureMFTB; }
bool allowsUnalignedFPAccess() const { return AllowsUnalignedFPAccess; }
bool isDeprecatedDST() const { return DeprecatedDST; }
bool hasICBT() const { return HasICBT; }
bool hasInvariantFunctionDescriptors() const {

View File

@ -0,0 +1,446 @@
//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Description: ImmutableGraph is a fast DAG implementation that cannot be
/// modified, except by creating a new ImmutableGraph. ImmutableGraph is
/// implemented as two arrays: one containing nodes, and one containing edges.
/// The advantages to this implementation are two-fold:
/// 1. Iteration and traversal operations benefit from cache locality.
/// 2. Operations on sets of nodes/edges are efficient, and representations of
/// those sets in memory are compact. For instance, a set of edges is
/// implemented as a bit vector, wherein each bit corresponds to one edge in
/// the edge array. This implies a lower bound of 64x spatial improvement
/// over, e.g., an llvm::DenseSet or llvm::SmallSet. It also means that
/// insert/erase/contains operations complete in negligible constant time:
/// insert and erase require one load and one store, and contains requires
/// just one load.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
#define LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <iterator>
#include <utility>
#include <vector>
namespace llvm {
template <typename NodeValueT, typename EdgeValueT> class ImmutableGraph {
using Traits = GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *>;
template <typename> friend class ImmutableGraphBuilder;
public:
using node_value_type = NodeValueT;
using edge_value_type = EdgeValueT;
using size_type = int;
class Node;
class Edge {
friend class ImmutableGraph;
template <typename> friend class ImmutableGraphBuilder;
const Node *Dest;
edge_value_type Value;
public:
const Node *getDest() const { return Dest; };
const edge_value_type &getValue() const { return Value; }
};
class Node {
friend class ImmutableGraph;
template <typename> friend class ImmutableGraphBuilder;
const Edge *Edges;
node_value_type Value;
public:
const node_value_type &getValue() const { return Value; }
const Edge *edges_begin() const { return Edges; }
// Nodes are allocated sequentially. Edges for a node are stored together.
// The end of this Node's edges is the beginning of the next node's edges.
// An extra node was allocated to hold the end pointer for the last real
// node.
const Edge *edges_end() const { return (this + 1)->Edges; }
ArrayRef<Edge> edges() const {
return makeArrayRef(edges_begin(), edges_end());
}
};
protected:
ImmutableGraph(std::unique_ptr<Node[]> Nodes, std::unique_ptr<Edge[]> Edges,
size_type NodesSize, size_type EdgesSize)
: Nodes(std::move(Nodes)), Edges(std::move(Edges)), NodesSize(NodesSize),
EdgesSize(EdgesSize) {}
ImmutableGraph(const ImmutableGraph &) = delete;
ImmutableGraph(ImmutableGraph &&) = delete;
ImmutableGraph &operator=(const ImmutableGraph &) = delete;
ImmutableGraph &operator=(ImmutableGraph &&) = delete;
public:
ArrayRef<Node> nodes() const { return makeArrayRef(Nodes.get(), NodesSize); }
const Node *nodes_begin() const { return nodes().begin(); }
const Node *nodes_end() const { return nodes().end(); }
ArrayRef<Edge> edges() const { return makeArrayRef(Edges.get(), EdgesSize); }
const Edge *edges_begin() const { return edges().begin(); }
const Edge *edges_end() const { return edges().end(); }
size_type nodes_size() const { return NodesSize; }
size_type edges_size() const { return EdgesSize; }
// Node N must belong to this ImmutableGraph.
size_type getNodeIndex(const Node &N) const {
return std::distance(nodes_begin(), &N);
}
// Edge E must belong to this ImmutableGraph.
size_type getEdgeIndex(const Edge &E) const {
return std::distance(edges_begin(), &E);
}
// FIXME: Could NodeSet and EdgeSet be templated to share code?
class NodeSet {
const ImmutableGraph &G;
BitVector V;
public:
NodeSet(const ImmutableGraph &G, bool ContainsAll = false)
: G{G}, V{static_cast<unsigned>(G.nodes_size()), ContainsAll} {}
bool insert(const Node &N) {
size_type Idx = G.getNodeIndex(N);
bool AlreadyExists = V.test(Idx);
V.set(Idx);
return !AlreadyExists;
}
void erase(const Node &N) {
size_type Idx = G.getNodeIndex(N);
V.reset(Idx);
}
bool contains(const Node &N) const {
size_type Idx = G.getNodeIndex(N);
return V.test(Idx);
}
void clear() { V.reset(); }
size_type empty() const { return V.none(); }
/// Return the number of elements in the set
size_type count() const { return V.count(); }
/// Return the size of the set's domain
size_type size() const { return V.size(); }
/// Set union
NodeSet &operator|=(const NodeSet &RHS) {
assert(&this->G == &RHS.G);
V |= RHS.V;
return *this;
}
/// Set intersection
NodeSet &operator&=(const NodeSet &RHS) {
assert(&this->G == &RHS.G);
V &= RHS.V;
return *this;
}
/// Set disjoint union
NodeSet &operator^=(const NodeSet &RHS) {
assert(&this->G == &RHS.G);
V ^= RHS.V;
return *this;
}
using index_iterator = typename BitVector::const_set_bits_iterator;
index_iterator index_begin() const { return V.set_bits_begin(); }
index_iterator index_end() const { return V.set_bits_end(); }
void set(size_type Idx) { V.set(Idx); }
void reset(size_type Idx) { V.reset(Idx); }
class iterator {
const NodeSet &Set;
size_type Current;
void advance() {
assert(Current != -1);
Current = Set.V.find_next(Current);
}
public:
iterator(const NodeSet &Set, size_type Begin)
: Set{Set}, Current{Begin} {}
iterator operator++(int) {
iterator Tmp = *this;
advance();
return Tmp;
}
iterator &operator++() {
advance();
return *this;
}
Node *operator*() const {
assert(Current != -1);
return Set.G.nodes_begin() + Current;
}
bool operator==(const iterator &other) const {
assert(&this->Set == &other.Set);
return this->Current == other.Current;
}
bool operator!=(const iterator &other) const { return !(*this == other); }
};
iterator begin() const { return iterator{*this, V.find_first()}; }
iterator end() const { return iterator{*this, -1}; }
};
class EdgeSet {
const ImmutableGraph &G;
BitVector V;
public:
EdgeSet(const ImmutableGraph &G, bool ContainsAll = false)
: G{G}, V{static_cast<unsigned>(G.edges_size()), ContainsAll} {}
bool insert(const Edge &E) {
size_type Idx = G.getEdgeIndex(E);
bool AlreadyExists = V.test(Idx);
V.set(Idx);
return !AlreadyExists;
}
void erase(const Edge &E) {
size_type Idx = G.getEdgeIndex(E);
V.reset(Idx);
}
bool contains(const Edge &E) const {
size_type Idx = G.getEdgeIndex(E);
return V.test(Idx);
}
void clear() { V.reset(); }
bool empty() const { return V.none(); }
/// Return the number of elements in the set
size_type count() const { return V.count(); }
/// Return the size of the set's domain
size_type size() const { return V.size(); }
/// Set union
EdgeSet &operator|=(const EdgeSet &RHS) {
assert(&this->G == &RHS.G);
V |= RHS.V;
return *this;
}
/// Set intersection
EdgeSet &operator&=(const EdgeSet &RHS) {
assert(&this->G == &RHS.G);
V &= RHS.V;
return *this;
}
/// Set disjoint union
EdgeSet &operator^=(const EdgeSet &RHS) {
assert(&this->G == &RHS.G);
V ^= RHS.V;
return *this;
}
using index_iterator = typename BitVector::const_set_bits_iterator;
index_iterator index_begin() const { return V.set_bits_begin(); }
index_iterator index_end() const { return V.set_bits_end(); }
void set(size_type Idx) { V.set(Idx); }
void reset(size_type Idx) { V.reset(Idx); }
class iterator {
const EdgeSet &Set;
size_type Current;
void advance() {
assert(Current != -1);
Current = Set.V.find_next(Current);
}
public:
iterator(const EdgeSet &Set, size_type Begin)
: Set{Set}, Current{Begin} {}
iterator operator++(int) {
iterator Tmp = *this;
advance();
return Tmp;
}
iterator &operator++() {
advance();
return *this;
}
Edge *operator*() const {
assert(Current != -1);
return Set.G.edges_begin() + Current;
}
bool operator==(const iterator &other) const {
assert(&this->Set == &other.Set);
return this->Current == other.Current;
}
bool operator!=(const iterator &other) const { return !(*this == other); }
};
iterator begin() const { return iterator{*this, V.find_first()}; }
iterator end() const { return iterator{*this, -1}; }
};
private:
std::unique_ptr<Node[]> Nodes;
std::unique_ptr<Edge[]> Edges;
size_type NodesSize;
size_type EdgesSize;
};
template <typename GraphT> class ImmutableGraphBuilder {
using node_value_type = typename GraphT::node_value_type;
using edge_value_type = typename GraphT::edge_value_type;
static_assert(
std::is_base_of<ImmutableGraph<node_value_type, edge_value_type>,
GraphT>::value,
"Template argument to ImmutableGraphBuilder must derive from "
"ImmutableGraph<>");
using size_type = typename GraphT::size_type;
using NodeSet = typename GraphT::NodeSet;
using Node = typename GraphT::Node;
using EdgeSet = typename GraphT::EdgeSet;
using Edge = typename GraphT::Edge;
using BuilderEdge = std::pair<edge_value_type, size_type>;
using EdgeList = std::vector<BuilderEdge>;
using BuilderVertex = std::pair<node_value_type, EdgeList>;
using VertexVec = std::vector<BuilderVertex>;
public:
using BuilderNodeRef = size_type;
BuilderNodeRef addVertex(const node_value_type &V) {
auto I = AdjList.emplace(AdjList.end(), V, EdgeList{});
return std::distance(AdjList.begin(), I);
}
void addEdge(const edge_value_type &E, BuilderNodeRef From,
BuilderNodeRef To) {
AdjList[From].second.emplace_back(E, To);
}
bool empty() const { return AdjList.empty(); }
template <typename... ArgT> std::unique_ptr<GraphT> get(ArgT &&... Args) {
size_type VertexSize = AdjList.size(), EdgeSize = 0;
for (const auto &V : AdjList) {
EdgeSize += V.second.size();
}
auto VertexArray =
std::make_unique<Node[]>(VertexSize + 1 /* terminator node */);
auto EdgeArray = std::make_unique<Edge[]>(EdgeSize);
size_type VI = 0, EI = 0;
for (; VI < VertexSize; ++VI) {
VertexArray[VI].Value = std::move(AdjList[VI].first);
VertexArray[VI].Edges = &EdgeArray[EI];
auto NumEdges = static_cast<size_type>(AdjList[VI].second.size());
for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) {
auto &E = AdjList[VI].second[VEI];
EdgeArray[EI].Value = std::move(E.first);
EdgeArray[EI].Dest = &VertexArray[E.second];
}
}
assert(VI == VertexSize && EI == EdgeSize && "ImmutableGraph malformed");
VertexArray[VI].Edges = &EdgeArray[EdgeSize]; // terminator node
return std::make_unique<GraphT>(std::move(VertexArray),
std::move(EdgeArray), VertexSize, EdgeSize,
std::forward<ArgT>(Args)...);
}
template <typename... ArgT>
static std::unique_ptr<GraphT> trim(const GraphT &G, const NodeSet &TrimNodes,
const EdgeSet &TrimEdges,
ArgT &&... Args) {
size_type NewVertexSize = G.nodes_size() - TrimNodes.count();
size_type NewEdgeSize = G.edges_size() - TrimEdges.count();
auto NewVertexArray =
std::make_unique<Node[]>(NewVertexSize + 1 /* terminator node */);
auto NewEdgeArray = std::make_unique<Edge[]>(NewEdgeSize);
// Walk the nodes and determine the new index for each node.
size_type NewNodeIndex = 0;
std::vector<size_type> RemappedNodeIndex(G.nodes_size());
for (const Node &N : G.nodes()) {
if (TrimNodes.contains(N))
continue;
RemappedNodeIndex[G.getNodeIndex(N)] = NewNodeIndex++;
}
assert(NewNodeIndex == NewVertexSize &&
"Should have assigned NewVertexSize indices");
size_type VertexI = 0, EdgeI = 0;
for (const Node &N : G.nodes()) {
if (TrimNodes.contains(N))
continue;
NewVertexArray[VertexI].Value = N.getValue();
NewVertexArray[VertexI].Edges = &NewEdgeArray[EdgeI];
for (const Edge &E : N.edges()) {
if (TrimEdges.contains(E))
continue;
NewEdgeArray[EdgeI].Value = E.getValue();
size_type DestIdx = G.getNodeIndex(*E.getDest());
size_type NewIdx = RemappedNodeIndex[DestIdx];
assert(NewIdx < NewVertexSize);
NewEdgeArray[EdgeI].Dest = &NewVertexArray[NewIdx];
++EdgeI;
}
++VertexI;
}
assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize &&
"Gadget graph malformed");
NewVertexArray[VertexI].Edges = &NewEdgeArray[NewEdgeSize]; // terminator
return std::make_unique<GraphT>(std::move(NewVertexArray),
std::move(NewEdgeArray), NewVertexSize,
NewEdgeSize, std::forward<ArgT>(Args)...);
}
private:
VertexVec AdjList;
};
template <typename NodeValueT, typename EdgeValueT>
struct GraphTraits<ImmutableGraph<NodeValueT, EdgeValueT> *> {
using GraphT = ImmutableGraph<NodeValueT, EdgeValueT>;
using NodeRef = typename GraphT::Node const *;
using EdgeRef = typename GraphT::Edge const &;
static NodeRef edge_dest(EdgeRef E) { return E.getDest(); }
using ChildIteratorType =
mapped_iterator<typename GraphT::Edge const *, decltype(&edge_dest)>;
static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); }
static ChildIteratorType child_begin(NodeRef N) {
return {N->edges_begin(), &edge_dest};
}
static ChildIteratorType child_end(NodeRef N) {
return {N->edges_end(), &edge_dest};
}
static NodeRef getNode(typename GraphT::Node const &N) { return NodeRef{&N}; }
using nodes_iterator =
mapped_iterator<typename GraphT::Node const *, decltype(&getNode)>;
static nodes_iterator nodes_begin(GraphT *G) {
return {G->nodes_begin(), &getNode};
}
static nodes_iterator nodes_end(GraphT *G) {
return {G->nodes_end(), &getNode};
}
using ChildEdgeIteratorType = typename GraphT::Edge const *;
static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
return N->edges_begin();
}
static ChildEdgeIteratorType child_edge_end(NodeRef N) {
return N->edges_end();
}
static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); }
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_IMMUTABLEGRAPH_H

View File

@ -120,7 +120,7 @@ FunctionPass *createX86DomainReassignmentPass();
FunctionPass *createX86EvexToVexInsts();
/// This pass creates the thunks for the retpoline feature.
FunctionPass *createX86RetpolineThunksPass();
FunctionPass *createX86IndirectThunksPass();
/// This pass ensures instructions featuring a memory operand
/// have distinctive <LineNumber, Discriminator> (with respect to eachother)
@ -133,6 +133,9 @@ InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
X86Subtarget &,
X86RegisterBankInfo &);
FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
FunctionPass *createX86LoadValueInjectionLoadHardeningUnoptimizedPass();
FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
@ -148,6 +151,9 @@ void initializeX86DomainReassignmentPass(PassRegistry &);
void initializeX86ExecutionDomainFixPass(PassRegistry &);
void initializeX86ExpandPseudoPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
void initializeX86LoadValueInjectionLoadHardeningUnoptimizedPassPass(PassRegistry &);
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86OptimizeLEAPassPass(PassRegistry &);
void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &);

View File

@ -426,6 +426,22 @@ def FeatureRetpolineExternalThunk
"ourselves. Only has effect when combined with some other retpoline "
"feature", [FeatureRetpolineIndirectCalls]>;
// Mitigate LVI attacks against indirect calls/branches and call returns
def FeatureLVIControlFlowIntegrity
: SubtargetFeature<
"lvi-cfi", "UseLVIControlFlowIntegrity", "true",
"Prevent indirect calls/branches from using a memory operand, and "
"precede all indirect calls/branches from a register with an "
"LFENCE instruction to serialize control flow. Also decompose RET "
"instructions into a POP+LFENCE+JMP sequence.">;
// Mitigate LVI attacks against data loads
def FeatureLVILoadHardening
: SubtargetFeature<
"lvi-load-hardening", "UseLVILoadHardening", "true",
"Insert LFENCE instructions to prevent data speculatively injected "
"into loads from being used maliciously.">;
// Direct Move instructions.
def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction">;

View File

@ -3202,8 +3202,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
(CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers")))
return false;
// Functions using retpoline for indirect calls need to use SDISel.
if (Subtarget->useRetpolineIndirectCalls())
// Functions using thunks for indirect calls need to use SDISel.
if (Subtarget->useIndirectThunkCalls())
return false;
// Handle only C, fastcc, and webkit_js calling conventions for now.

View File

@ -765,10 +765,10 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
bool InProlog) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
// FIXME: Add retpoline support and remove this.
if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls())
// FIXME: Add indirect thunk support and remove this.
if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls())
report_fatal_error("Emitting stack probe calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
"code model and indirect thunks not yet implemented.");
unsigned CallOp;
if (Is64Bit)
@ -2493,9 +2493,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
// is laid out within 2^31 bytes of each function body, but this seems
// to be sufficient for JIT.
// FIXME: Add retpoline support and remove the error here..
if (STI.useRetpolineIndirectCalls())
if (STI.useIndirectThunkCalls())
report_fatal_error("Emitting morestack calls on 64-bit with the large "
"code model and retpoline not yet implemented.");
"code model and thunks not yet implemented.");
BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
.addReg(X86::RIP)
.addImm(0)

View File

@ -987,7 +987,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (OptLevel != CodeGenOpt::None &&
// Only do this when the target can fold the load into the call or
// jmp.
!Subtarget->useRetpolineIndirectCalls() &&
!Subtarget->useIndirectThunkCalls() &&
((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) ||
(N->getOpcode() == X86ISD::TC_RETURN &&
(Subtarget->is64Bit() ||

View File

@ -30221,8 +30221,8 @@ bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask,
}
bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
// If the subtarget is using retpolines, we need to not generate jump tables.
if (Subtarget.useRetpolineIndirectBranches())
// If the subtarget is using thunks, we need to not generate jump tables.
if (Subtarget.useIndirectThunkBranches())
return false;
// Otherwise, fallback on the generic logic.
@ -31345,22 +31345,22 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
return BB;
}
static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) {
switch (RPOpc) {
case X86::RETPOLINE_CALL32:
case X86::INDIRECT_THUNK_CALL32:
return X86::CALLpcrel32;
case X86::RETPOLINE_CALL64:
case X86::INDIRECT_THUNK_CALL64:
return X86::CALL64pcrel32;
case X86::RETPOLINE_TCRETURN32:
case X86::INDIRECT_THUNK_TCRETURN32:
return X86::TCRETURNdi;
case X86::RETPOLINE_TCRETURN64:
case X86::INDIRECT_THUNK_TCRETURN64:
return X86::TCRETURNdi64;
}
llvm_unreachable("not retpoline opcode");
llvm_unreachable("not indirect thunk opcode");
}
static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
unsigned Reg) {
static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget,
unsigned Reg) {
if (Subtarget.useRetpolineExternalThunk()) {
// When using an external thunk for retpolines, we pick names that match the
// names GCC happens to use as well. This helps simplify the implementation
@ -31392,39 +31392,48 @@ static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__x86_indirect_thunk_r11";
}
llvm_unreachable("unexpected reg for external indirect thunk");
}
if (Subtarget.useRetpolineIndirectCalls() ||
Subtarget.useRetpolineIndirectBranches()) {
// When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_eax";
case X86::ECX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_ecx";
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edx";
case X86::EDI:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edi";
case X86::R11:
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__llvm_retpoline_r11";
}
llvm_unreachable("unexpected reg for retpoline");
}
// When targeting an internal COMDAT thunk use an LLVM-specific name.
switch (Reg) {
case X86::EAX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_eax";
case X86::ECX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_ecx";
case X86::EDX:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edx";
case X86::EDI:
assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
return "__llvm_retpoline_edi";
case X86::R11:
if (Subtarget.useLVIControlFlowIntegrity()) {
assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
return "__llvm_retpoline_r11";
return "__llvm_lvi_thunk_r11";
}
llvm_unreachable("unexpected reg for retpoline");
llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature");
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
MachineBasicBlock *BB) const {
X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
DebugLoc DL = MI.getDebugLoc();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
unsigned Opc = getOpcodeForRetpoline(MI.getOpcode());
unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
// Find an available scratch register to hold the callee. On 64-bit, we can
// just use R11, but we scan for uses anyway to ensure we don't generate
@ -31458,7 +31467,7 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
report_fatal_error("calling convention incompatible with retpoline, no "
"available registers");
const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
@ -32234,11 +32243,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
case X86::RETPOLINE_CALL32:
case X86::RETPOLINE_CALL64:
case X86::RETPOLINE_TCRETURN32:
case X86::RETPOLINE_TCRETURN64:
return EmitLoweredRetpoline(MI, BB);
case X86::INDIRECT_THUNK_CALL32:
case X86::INDIRECT_THUNK_CALL64:
case X86::INDIRECT_THUNK_TCRETURN32:
case X86::INDIRECT_THUNK_TCRETURN64:
return EmitLoweredIndirectThunk(MI, BB);
case X86::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case X86::CATCHPAD:

View File

@ -1482,8 +1482,8 @@ namespace llvm {
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;

View File

@ -0,0 +1,364 @@
//==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Pass that injects an MI thunk that is used to lower indirect calls in a way
/// that prevents speculation on some x86 processors and can be used to mitigate
/// security vulnerabilities due to targeted speculative execution and side
/// channels such as CVE-2017-5715.
///
/// Currently supported thunks include:
/// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
/// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
/// before making an indirect call/jump
///
/// Note that the reason that this is implemented as a MachineFunctionPass and
/// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
/// serialize all transformations, which can consume lots of memory.
///
/// TODO(chandlerc): All of this code could use better comments and
/// documentation.
///
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "x86-retpoline-thunks"
static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
static const char R11RetpolineName[] = "__llvm_retpoline_r11";
static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
namespace {
template <typename Derived> class ThunkInserter {
Derived &getDerived() { return *static_cast<Derived *>(this); }
protected:
bool InsertedThunks;
void doInitialization(Module &M) {}
void createThunkFunction(MachineModuleInfo &MMI, StringRef Name);
public:
void init(Module &M) {
InsertedThunks = false;
getDerived().doInitialization(M);
}
// return `true` if `MMI` or `MF` was modified
bool run(MachineModuleInfo &MMI, MachineFunction &MF);
};
struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
const char *getThunkPrefix() { return RetpolineNamePrefix; }
bool mayUseThunk(const MachineFunction &MF) {
const auto &STI = MF.getSubtarget<X86Subtarget>();
return (STI.useRetpolineIndirectCalls() ||
STI.useRetpolineIndirectBranches()) &&
!STI.useRetpolineExternalThunk();
}
void insertThunks(MachineModuleInfo &MMI);
void populateThunk(MachineFunction &MF);
};
struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
const char *getThunkPrefix() { return LVIThunkNamePrefix; }
bool mayUseThunk(const MachineFunction &MF) {
return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
}
void insertThunks(MachineModuleInfo &MMI) {
createThunkFunction(MMI, R11LVIThunkName);
}
void populateThunk(MachineFunction &MF) {
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
// generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
while (MF.size() > 1)
MF.erase(std::next(MF.begin()));
// This code mitigates LVI by replacing each indirect call/jump with a
// direct call/jump to a thunk that looks like:
// ```
// lfence
// jmpq *%r11
// ```
// This ensures that if the value in register %r11 was loaded from memory,
// then the value in %r11 is (architecturally) correct prior to the jump.
const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
MF.front().addLiveIn(X86::R11);
return;
}
};
class X86IndirectThunks : public MachineFunctionPass {
public:
static char ID;
X86IndirectThunks() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "X86 Indirect Thunks"; }
bool doInitialization(Module &M) override;
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
}
private:
std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
// FIXME: When LLVM moves to C++17, these can become folds
template <typename... ThunkInserterT>
static void initTIs(Module &M,
std::tuple<ThunkInserterT...> &ThunkInserters) {
(void)std::initializer_list<int>{
(std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
}
template <typename... ThunkInserterT>
static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
std::tuple<ThunkInserterT...> &ThunkInserters) {
bool Modified = false;
(void)std::initializer_list<int>{
Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
return Modified;
}
};
} // end anonymous namespace
void RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI) {
if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
createThunkFunction(MMI, R11RetpolineName);
else
for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
EDIRetpolineName})
createThunkFunction(MMI, Name);
}
void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
Register ThunkReg;
if (Is64Bit) {
assert(MF.getName() == "__llvm_retpoline_r11" &&
"Should only have an r11 thunk on 64-bit targets");
// __llvm_retpoline_r11:
// callq .Lr11_call_target
// .Lr11_capture_spec:
// pause
// lfence
// jmp .Lr11_capture_spec
// .align 16
// .Lr11_call_target:
// movq %r11, (%rsp)
// retq
ThunkReg = X86::R11;
} else {
// For 32-bit targets we need to emit a collection of thunks for various
// possible scratch registers as well as a fallback that uses EDI, which is
// normally callee saved.
// __llvm_retpoline_eax:
// calll .Leax_call_target
// .Leax_capture_spec:
// pause
// jmp .Leax_capture_spec
// .align 16
// .Leax_call_target:
// movl %eax, (%esp) # Clobber return addr
// retl
//
// __llvm_retpoline_ecx:
// ... # Same setup
// movl %ecx, (%esp)
// retl
//
// __llvm_retpoline_edx:
// ... # Same setup
// movl %edx, (%esp)
// retl
//
// __llvm_retpoline_edi:
// ... # Same setup
// movl %edi, (%esp)
// retl
if (MF.getName() == EAXRetpolineName)
ThunkReg = X86::EAX;
else if (MF.getName() == ECXRetpolineName)
ThunkReg = X86::ECX;
else if (MF.getName() == EDXRetpolineName)
ThunkReg = X86::EDX;
else if (MF.getName() == EDIRetpolineName)
ThunkReg = X86::EDI;
else
llvm_unreachable("Invalid thunk name on x86-32!");
}
const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
// generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
while (MF.size() > 1)
MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec =
MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MachineBasicBlock *CallTarget =
MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MCSymbol *TargetSym = MF.getContext().createTempSymbol();
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
Entry->addLiveIn(ThunkReg);
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
// The MIR verifier thinks that the CALL in the entry block will fall through
// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
// the successor, but the MIR verifier doesn't know how to cope with that.
Entry->addSuccessor(CaptureSpec);
// In the capture loop for speculation, we want to stop the processor from
// speculating as fast as possible. On Intel processors, the PAUSE instruction
// will block speculation without consuming any execution resources. On AMD
// processors, the PAUSE instruction is (essentially) a nop, so we also use an
// LFENCE instruction which they have advised will stop speculation as well
// with minimal resource utilization. We still end the capture with a jump to
// form an infinite loop to fully guarantee that no matter what implementation
// of the x86 ISA, speculating this code path never escapes.
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
CaptureSpec->setHasAddressTaken();
CaptureSpec->addSuccessor(CaptureSpec);
CallTarget->addLiveIn(ThunkReg);
CallTarget->setHasAddressTaken();
CallTarget->setAlignment(Align(16));
// Insert return address clobber
const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
0)
.addReg(ThunkReg);
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}
template <typename Derived>
void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
StringRef Name) {
assert(Name.startswith(getDerived().getThunkPrefix()) &&
"Created a thunk with an unexpected prefix!");
Module &M = const_cast<Module &>(*MMI.getModule());
LLVMContext &Ctx = M.getContext();
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
Function *F =
Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
F->setVisibility(GlobalValue::HiddenVisibility);
F->setComdat(M.getOrInsertComdat(Name));
// Add Attributes so that we don't create a frame, unwind information, or
// inline.
AttrBuilder B;
B.addAttribute(llvm::Attribute::NoUnwind);
B.addAttribute(llvm::Attribute::Naked);
F->addAttributes(llvm::AttributeList::FunctionIndex, B);
// Populate our function a bit so that we can verify.
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
IRBuilder<> Builder(Entry);
Builder.CreateRetVoid();
// MachineFunctions/MachineBasicBlocks aren't created automatically for the
// IR-level constructs we already made. Create them and insert them into the
// module.
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
// Insert EntryMBB into MF. It's not in the module until we do this.
MF.insert(MF.end(), EntryMBB);
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
}
template <typename Derived>
bool ThunkInserter<Derived>::run(MachineModuleInfo &MMI, MachineFunction &MF) {
// If MF is not a thunk, check to see if we need to insert a thunk.
if (!MF.getName().startswith(getDerived().getThunkPrefix())) {
// If we've already inserted a thunk, nothing else to do.
if (InsertedThunks)
return false;
// Only add a thunk if one of the functions has the corresponding feature
// enabled in its subtarget, and doesn't enable external thunks.
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
// nothing will end up calling it.
// FIXME: It's a little silly to look at every function just to enumerate
// the subtargets, but eventually we'll want to look at them for indirect
// calls, so maybe this is OK.
if (!getDerived().mayUseThunk(MF))
return false;
getDerived().insertThunks(MMI);
InsertedThunks = true;
return true;
}
// If this *is* a thunk function, we need to populate it with the correct MI.
getDerived().populateThunk(MF);
return true;
}
FunctionPass *llvm::createX86IndirectThunksPass() {
return new X86IndirectThunks();
}
char X86IndirectThunks::ID = 0;
bool X86IndirectThunks::doInitialization(Module &M) {
initTIs(M, TIs);
return false;
}
bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << getPassName() << '\n');
auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
return runTIs(MMI, MF, TIs);
}

View File

@ -1213,14 +1213,14 @@ def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[Not64BitMode, NotUseRetpolineIndirectCalls]>;
Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
Requires<[Not64BitMode, IsNotPIC, NotUseRetpolineIndirectCalls]>;
Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi tglobaladdr:$dst, imm:$off)>,
@ -1232,21 +1232,21 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>;
Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode, UseRetpolineIndirectCalls]>;
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[In64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
(RETPOLINE_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[Not64BitMode, UseRetpolineIndirectCalls]>;
(INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, imm:$off)>,
Requires<[Not64BitMode, UseIndirectThunkCalls]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,

View File

@ -237,13 +237,13 @@ let isCall = 1 in
Sched<[WriteJumpLd]>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
"call{l}\t{*}$dst", [(X86call GR32:$dst)]>, OpSize32,
Requires<[Not64BitMode,NotUseRetpolineIndirectCalls]>,
Requires<[Not64BitMode,NotUseIndirectThunkCalls]>,
Sched<[WriteJump]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
"call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
OpSize32,
Requires<[Not64BitMode,FavorMemIndirectCall,
NotUseRetpolineIndirectCalls]>,
NotUseIndirectThunkCalls]>,
Sched<[WriteJumpLd]>;
// Non-tracking calls for IBT, use with caution.
@ -334,11 +334,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>;
Requires<[In64BitMode,NotUseIndirectThunkCalls]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
NotUseRetpolineIndirectCalls]>;
NotUseIndirectThunkCalls]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@ -393,19 +393,19 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
usesCustomInserter = 1,
SchedRW = [WriteJump] in {
def RETPOLINE_CALL32 :
def INDIRECT_THUNK_CALL32 :
PseudoI<(outs), (ins GR32:$dst), [(X86call GR32:$dst)]>,
Requires<[Not64BitMode,UseRetpolineIndirectCalls]>;
Requires<[Not64BitMode,UseIndirectThunkCalls]>;
def RETPOLINE_CALL64 :
def INDIRECT_THUNK_CALL64 :
PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>,
Requires<[In64BitMode,UseRetpolineIndirectCalls]>;
Requires<[In64BitMode,UseIndirectThunkCalls]>;
// Retpoline variant of indirect tail calls.
// Indirect thunk variant of indirect tail calls.
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
def RETPOLINE_TCRETURN64 :
def INDIRECT_THUNK_TCRETURN64 :
PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>;
def RETPOLINE_TCRETURN32 :
def INDIRECT_THUNK_TCRETURN32 :
PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>;
}
}

View File

@ -996,8 +996,8 @@ def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">;
def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">;
def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.

View File

@ -0,0 +1,900 @@
//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
/// of a load from memory (i.e., SOURCE), and any operation that may transmit
/// the value loaded from memory over a covert channel, or use the value loaded
/// from memory to determine a branch/call target (i.e., SINK). After finding
/// all such gadgets in a given function, the pass minimally inserts LFENCE
/// instructions in such a manner that the following property is satisfied: for
/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
/// least one LFENCE instruction. The algorithm that implements this minimal
/// insertion is influenced by an academic paper that minimally inserts memory
/// fences for high-performance concurrent programs:
/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
/// The algorithm implemented in this pass is as follows:
/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
/// following components:
/// - SOURCE instructions (also includes function arguments)
/// - SINK instructions
/// - Basic block entry points
/// - Basic block terminators
/// - LFENCE instructions
/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
/// mitigated, go to step 6.
/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
/// 5. Go to step 2.
/// 6. If any LFENCEs were inserted, return `true` from runOnMachineFunction()
/// to tell LLVM that the function was modified.
///
//===----------------------------------------------------------------------===//
#include "ImmutableGraph.h"
#include "X86.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define PASS_KEY "x86-lvi-load"
#define DEBUG_TYPE PASS_KEY
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
"were deployed");
STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
static cl::opt<std::string> OptimizePluginPath(
PASS_KEY "-opt-plugin",
cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
static cl::opt<bool> NoConditionalBranches(
PASS_KEY "-no-cbranch",
cl::desc("Don't treat conditional branches as disclosure gadgets. This "
"may improve performance, at the cost of security."),
cl::init(false), cl::Hidden);
static cl::opt<bool> EmitDot(
PASS_KEY "-dot",
cl::desc(
"For each function, emit a dot graph depicting potential LVI gadgets"),
cl::init(false), cl::Hidden);
static cl::opt<bool> EmitDotOnly(
PASS_KEY "-dot-only",
cl::desc("For each function, emit a dot graph depicting potential LVI "
"gadgets, and do not insert any fences"),
cl::init(false), cl::Hidden);
static cl::opt<bool> EmitDotVerify(
PASS_KEY "-dot-verify",
cl::desc("For each function, emit a dot graph to stdout depicting "
"potential LVI gadgets, used for testing purposes only"),
cl::init(false), cl::Hidden);
static llvm::sys::DynamicLibrary OptimizeDL;
typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
unsigned int *edges, int *edge_values,
int *cut_edges /* out */, unsigned int edges_size);
static OptimizeCutT OptimizeCut = nullptr;
namespace {
struct MachineGadgetGraph : ImmutableGraph<MachineInstr *, int> {
static constexpr int GadgetEdgeSentinel = -1;
static constexpr MachineInstr *const ArgNodeSentinel = nullptr;
using GraphT = ImmutableGraph<MachineInstr *, int>;
using Node = typename GraphT::Node;
using Edge = typename GraphT::Edge;
using size_type = typename GraphT::size_type;
MachineGadgetGraph(std::unique_ptr<Node[]> Nodes,
std::unique_ptr<Edge[]> Edges, size_type NodesSize,
size_type EdgesSize, int NumFences = 0, int NumGadgets = 0)
: GraphT(std::move(Nodes), std::move(Edges), NodesSize, EdgesSize),
NumFences(NumFences), NumGadgets(NumGadgets) {}
static inline bool isCFGEdge(const Edge &E) {
return E.getValue() != GadgetEdgeSentinel;
}
static inline bool isGadgetEdge(const Edge &E) {
return E.getValue() == GadgetEdgeSentinel;
}
int NumFences;
int NumGadgets;
};
class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass {
public:
X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "X86 Load Value Injection (LVI) Load Hardening";
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
private:
using GraphBuilder = ImmutableGraphBuilder<MachineGadgetGraph>;
using EdgeSet = MachineGadgetGraph::EdgeSet;
using NodeSet = MachineGadgetGraph::NodeSet;
using Gadget = std::pair<MachineInstr *, MachineInstr *>;
const X86Subtarget *STI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
std::unique_ptr<MachineGadgetGraph>
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT,
const MachineDominanceFrontier &MDF) const;
int hardenLoadsWithPlugin(MachineFunction &MF,
std::unique_ptr<MachineGadgetGraph> Graph) const;
int hardenLoadsWithGreedyHeuristic(
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const;
int elimMitigatedEdgesAndNodes(MachineGadgetGraph &G,
EdgeSet &ElimEdges /* in, out */,
NodeSet &ElimNodes /* in, out */) const;
std::unique_ptr<MachineGadgetGraph>
trimMitigatedEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
void findAndCutEdges(MachineGadgetGraph &G,
EdgeSet &CutEdges /* out */) const;
int insertFences(MachineFunction &MF, MachineGadgetGraph &G,
EdgeSet &CutEdges /* in, out */) const;
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
inline bool isFence(const MachineInstr *MI) const {
return MI && (MI->getOpcode() == X86::LFENCE ||
(STI->useLVIControlFlowIntegrity() && MI->isCall()));
}
};
} // end anonymous namespace
namespace llvm {
template <>
struct GraphTraits<MachineGadgetGraph *>
: GraphTraits<ImmutableGraph<MachineInstr *, int> *> {};
template <>
struct DOTGraphTraits<MachineGadgetGraph *> : DefaultDOTGraphTraits {
using GraphType = MachineGadgetGraph;
using Traits = llvm::GraphTraits<GraphType *>;
using NodeRef = typename Traits::NodeRef;
using EdgeRef = typename Traits::EdgeRef;
using ChildIteratorType = typename Traits::ChildIteratorType;
using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType;
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
std::string getNodeLabel(NodeRef Node, GraphType *) {
if (Node->getValue() == MachineGadgetGraph::ArgNodeSentinel)
return "ARGS";
std::string Str;
raw_string_ostream OS(Str);
OS << *Node->getValue();
return OS.str();
}
static std::string getNodeAttributes(NodeRef Node, GraphType *) {
MachineInstr *MI = Node->getValue();
if (MI == MachineGadgetGraph::ArgNodeSentinel)
return "color = blue";
if (MI->getOpcode() == X86::LFENCE)
return "color = green";
return "";
}
static std::string getEdgeAttributes(NodeRef, ChildIteratorType E,
GraphType *) {
int EdgeVal = (*E.getCurrent()).getValue();
return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal)
: "color = red, style = \"dashed\"";
}
};
} // end namespace llvm
constexpr MachineInstr *MachineGadgetGraph::ArgNodeSentinel;
constexpr int MachineGadgetGraph::GadgetEdgeSentinel;
char X86LoadValueInjectionLoadHardeningPass::ID = 0;
void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage(
AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineDominanceFrontier>();
AU.setPreservesCFG();
}
static void WriteGadgetGraph(raw_ostream &OS, MachineFunction &MF,
MachineGadgetGraph *G) {
WriteGraph(OS, G, /*ShortNames*/ false,
"Speculative gadgets for \"" + MF.getName() + "\" function");
}
bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
<< " *****\n");
STI = &MF.getSubtarget<X86Subtarget>();
if (!STI->useLVILoadHardening())
return false;
// FIXME: support 32-bit
if (!STI->is64Bit())
report_fatal_error("LVI load hardening is only supported on 64-bit", false);
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
const Function &F = MF.getFunction();
if (!F.hasOptNone() && skipFunction(F))
return false;
++NumFunctionsConsidered;
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
const auto &MLI = getAnalysis<MachineLoopInfo>();
const auto &MDT = getAnalysis<MachineDominatorTree>();
const auto &MDF = getAnalysis<MachineDominanceFrontier>();
std::unique_ptr<MachineGadgetGraph> Graph = getGadgetGraph(MF, MLI, MDT, MDF);
LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n");
if (Graph == nullptr)
return false; // didn't find any gadgets
if (EmitDotVerify) {
WriteGadgetGraph(outs(), MF, Graph.get());
return false;
}
if (EmitDot || EmitDotOnly) {
LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n");
std::error_code FileError;
std::string FileName = "lvi.";
FileName += MF.getName();
FileName += ".dot";
raw_fd_ostream FileOut(FileName, FileError);
if (FileError)
errs() << FileError.message();
WriteGadgetGraph(FileOut, MF, Graph.get());
FileOut.close();
LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n");
if (EmitDotOnly)
return false;
}
int FencesInserted;
if (!OptimizePluginPath.empty()) {
if (!OptimizeDL.isValid()) {
std::string ErrorMsg;
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
OptimizePluginPath.c_str(), &ErrorMsg);
if (!ErrorMsg.empty())
report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
if (!OptimizeCut)
report_fatal_error("Invalid optimization plugin");
}
FencesInserted = hardenLoadsWithPlugin(MF, std::move(Graph));
} else { // Use the default greedy heuristic
FencesInserted = hardenLoadsWithGreedyHeuristic(MF, std::move(Graph));
}
if (FencesInserted > 0)
++NumFunctionsMitigated;
NumFences += FencesInserted;
return (FencesInserted > 0);
}
std::unique_ptr<MachineGadgetGraph>
X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
MachineFunction &MF, const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT,
const MachineDominanceFrontier &MDF) const {
using namespace rdf;
// Build the Register Dataflow Graph using the RDF framework
TargetOperandInfo TOI{*TII};
DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI};
DFG.build();
Liveness L{MF.getRegInfo(), DFG};
L.computePhiInfo();
GraphBuilder Builder;
using GraphIter = typename GraphBuilder::BuilderNodeRef;
DenseMap<MachineInstr *, GraphIter> NodeMap;
int FenceCount = 0, GadgetCount = 0;
auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) {
auto Ref = NodeMap.find(MI);
if (Ref == NodeMap.end()) {
auto I = Builder.addVertex(MI);
NodeMap[MI] = I;
return std::pair<GraphIter, bool>{I, true};
}
return std::pair<GraphIter, bool>{Ref->getSecond(), false};
};
// The `Transmitters` map memoizes transmitters found for each def. If a def
// has not yet been analyzed, then it will not appear in the map. If a def
// has been analyzed and was determined not to have any transmitters, then
// its list of transmitters will be empty.
DenseMap<NodeId, std::vector<NodeId>> Transmitters;
// Analyze all machine instructions to find gadgets and LFENCEs, adding
// each interesting value to `Nodes`
auto AnalyzeDef = [&](NodeAddr<DefNode *> SourceDef) {
SmallSet<NodeId, 8> UsesVisited, DefsVisited;
std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
[&](NodeAddr<DefNode *> Def) {
if (Transmitters.find(Def.Id) != Transmitters.end())
return; // Already analyzed `Def`
// Use RDF to find all the uses of `Def`
rdf::NodeSet Uses;
RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG));
for (auto UseID : L.getAllReachedUses(DefReg, Def)) {
auto Use = DFG.addr<UseNode *>(UseID);
if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node
NodeAddr<PhiNode *> Phi = Use.Addr->getOwner(DFG);
for (auto I : L.getRealUses(Phi.Id)) {
if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) {
for (auto UA : I.second)
Uses.emplace(UA.first);
}
}
} else { // not a phi node
Uses.emplace(UseID);
}
}
// For each use of `Def`, we want to know whether:
// (1) The use can leak the Def'ed value,
// (2) The use can further propagate the Def'ed value to more defs
for (auto UseID : Uses) {
if (!UsesVisited.insert(UseID).second)
continue; // Already visited this use of `Def`
auto Use = DFG.addr<UseNode *>(UseID);
assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef));
MachineOperand &UseMO = Use.Addr->getOp();
MachineInstr &UseMI = *UseMO.getParent();
assert(UseMO.isReg());
// We naively assume that an instruction propagates any loaded
// uses to all defs unless the instruction is a call, in which
// case all arguments will be treated as gadget sources during
// analysis of the callee function.
if (UseMI.isCall())
continue;
// Check whether this use can transmit (leak) its value.
if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) ||
(!NoConditionalBranches &&
instrUsesRegToBranch(UseMI, UseMO.getReg()))) {
Transmitters[Def.Id].push_back(Use.Addr->getOwner(DFG).Id);
if (UseMI.mayLoad())
continue; // Found a transmitting load -- no need to continue
// traversing its defs (i.e., this load will become
// a new gadget source anyways).
}
// Check whether the use propagates to more defs.
NodeAddr<InstrNode *> Owner{Use.Addr->getOwner(DFG)};
rdf::NodeList AnalyzedChildDefs;
for (auto &ChildDef :
Owner.Addr->members_if(DataFlowGraph::IsDef, DFG)) {
if (!DefsVisited.insert(ChildDef.Id).second)
continue; // Already visited this def
if (Def.Addr->getAttrs() & NodeAttrs::Dead)
continue;
if (Def.Id == ChildDef.Id)
continue; // `Def` uses itself (e.g., increment loop counter)
AnalyzeDefUseChain(ChildDef);
// `Def` inherits all of its child defs' transmitters.
for (auto TransmitterId : Transmitters[ChildDef.Id])
Transmitters[Def.Id].push_back(TransmitterId);
}
}
// Note that this statement adds `Def.Id` to the map if no
// transmitters were found for `Def`.
auto &DefTransmitters = Transmitters[Def.Id];
// Remove duplicate transmitters
llvm::sort(DefTransmitters);
DefTransmitters.erase(
std::unique(DefTransmitters.begin(), DefTransmitters.end()),
DefTransmitters.end());
};
// Find all of the transmitters
AnalyzeDefUseChain(SourceDef);
auto &SourceDefTransmitters = Transmitters[SourceDef.Id];
if (SourceDefTransmitters.empty())
return; // No transmitters for `SourceDef`
MachineInstr *Source = SourceDef.Addr->getFlags() & NodeAttrs::PhiRef
? MachineGadgetGraph::ArgNodeSentinel
: SourceDef.Addr->getOp().getParent();
auto GadgetSource = MaybeAddNode(Source);
// Each transmitter is a sink for `SourceDef`.
for (auto TransmitterId : SourceDefTransmitters) {
MachineInstr *Sink = DFG.addr<StmtNode *>(TransmitterId).Addr->getCode();
auto GadgetSink = MaybeAddNode(Sink);
// Add the gadget edge to the graph.
Builder.addEdge(MachineGadgetGraph::GadgetEdgeSentinel,
GadgetSource.first, GadgetSink.first);
++GadgetCount;
}
};
LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n");
// Analyze function arguments
NodeAddr<BlockNode *> EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG);
for (NodeAddr<PhiNode *> ArgPhi :
EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) {
NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG);
llvm::for_each(Defs, AnalyzeDef);
}
// Analyze every instruction in MF
for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
for (NodeAddr<StmtNode *> SA :
BA.Addr->members_if(DataFlowGraph::IsCode<NodeAttrs::Stmt>, DFG)) {
MachineInstr *MI = SA.Addr->getCode();
if (isFence(MI)) {
MaybeAddNode(MI);
++FenceCount;
} else if (MI->mayLoad()) {
NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG);
llvm::for_each(Defs, AnalyzeDef);
}
}
}
LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n");
LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n");
if (GadgetCount == 0)
return nullptr;
NumGadgets += GadgetCount;
// Traverse CFG to build the rest of the graph
SmallSet<MachineBasicBlock *, 8> BlocksVisited;
std::function<void(MachineBasicBlock *, GraphIter, unsigned)> TraverseCFG =
[&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) {
unsigned LoopDepth = MLI.getLoopDepth(MBB);
if (!MBB->empty()) {
// Always add the first instruction in each block
auto NI = MBB->begin();
auto BeginBB = MaybeAddNode(&*NI);
Builder.addEdge(ParentDepth, GI, BeginBB.first);
if (!BlocksVisited.insert(MBB).second)
return;
// Add any instructions within the block that are gadget components
GI = BeginBB.first;
while (++NI != MBB->end()) {
auto Ref = NodeMap.find(&*NI);
if (Ref != NodeMap.end()) {
Builder.addEdge(LoopDepth, GI, Ref->getSecond());
GI = Ref->getSecond();
}
}
// Always add the terminator instruction, if one exists
auto T = MBB->getFirstTerminator();
if (T != MBB->end()) {
auto EndBB = MaybeAddNode(&*T);
if (EndBB.second)
Builder.addEdge(LoopDepth, GI, EndBB.first);
GI = EndBB.first;
}
}
for (MachineBasicBlock *Succ : MBB->successors())
TraverseCFG(Succ, GI, LoopDepth);
};
// ArgNodeSentinel is a pseudo-instruction that represents MF args in the
// GadgetGraph
GraphIter ArgNode = MaybeAddNode(MachineGadgetGraph::ArgNodeSentinel).first;
TraverseCFG(&MF.front(), ArgNode, 0);
std::unique_ptr<MachineGadgetGraph> G{Builder.get(FenceCount, GadgetCount)};
LLVM_DEBUG(dbgs() << "Found " << G->nodes_size() << " nodes\n");
return G;
}
// Returns the number of remaining gadget edges that could not be eliminated
int X86LoadValueInjectionLoadHardeningPass::elimMitigatedEdgesAndNodes(
MachineGadgetGraph &G, MachineGadgetGraph::EdgeSet &ElimEdges /* in, out */,
MachineGadgetGraph::NodeSet &ElimNodes /* in, out */) const {
if (G.NumFences > 0) {
// Eliminate fences and CFG edges that ingress and egress the fence, as
// they are trivially mitigated.
for (const auto &E : G.edges()) {
const MachineGadgetGraph::Node *Dest = E.getDest();
if (isFence(Dest->getValue())) {
ElimNodes.insert(*Dest);
ElimEdges.insert(E);
for (const auto &DE : Dest->edges())
ElimEdges.insert(DE);
}
}
}
// Find and eliminate gadget edges that have been mitigated.
int MitigatedGadgets = 0, RemainingGadgets = 0;
MachineGadgetGraph::NodeSet ReachableNodes{G};
for (const auto &RootN : G.nodes()) {
if (llvm::none_of(RootN.edges(), MachineGadgetGraph::isGadgetEdge))
continue; // skip this node if it isn't a gadget source
// Find all of the nodes that are CFG-reachable from RootN using DFS
ReachableNodes.clear();
std::function<void(const MachineGadgetGraph::Node *, bool)>
FindReachableNodes =
[&](const MachineGadgetGraph::Node *N, bool FirstNode) {
if (!FirstNode)
ReachableNodes.insert(*N);
for (const auto &E : N->edges()) {
const MachineGadgetGraph::Node *Dest = E.getDest();
if (MachineGadgetGraph::isCFGEdge(E) &&
!ElimEdges.contains(E) && !ReachableNodes.contains(*Dest))
FindReachableNodes(Dest, false);
}
};
FindReachableNodes(&RootN, true);
// Any gadget whose sink is unreachable has been mitigated
for (const auto &E : RootN.edges()) {
if (MachineGadgetGraph::isGadgetEdge(E)) {
if (ReachableNodes.contains(*E.getDest())) {
// This gadget's sink is reachable
++RemainingGadgets;
} else { // This gadget's sink is unreachable, and therefore mitigated
++MitigatedGadgets;
ElimEdges.insert(E);
}
}
}
}
return RemainingGadgets;
}
std::unique_ptr<MachineGadgetGraph>
X86LoadValueInjectionLoadHardeningPass::trimMitigatedEdges(
std::unique_ptr<MachineGadgetGraph> Graph) const {
MachineGadgetGraph::NodeSet ElimNodes{*Graph};
MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
int RemainingGadgets =
elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes);
if (ElimEdges.empty() && ElimNodes.empty()) {
Graph->NumFences = 0;
Graph->NumGadgets = RemainingGadgets;
} else {
Graph = GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, 0 /* NumFences */,
RemainingGadgets);
}
return Graph;
}
int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithPlugin(
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
int FencesInserted = 0;
do {
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
Graph = trimMitigatedEdges(std::move(Graph));
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
if (Graph->NumGadgets == 0)
break;
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
EdgeSet CutEdges{*Graph};
auto Nodes = std::make_unique<unsigned int[]>(Graph->nodes_size() +
1 /* terminator node */);
auto Edges = std::make_unique<unsigned int[]>(Graph->edges_size());
auto EdgeCuts = std::make_unique<int[]>(Graph->edges_size());
auto EdgeValues = std::make_unique<int[]>(Graph->edges_size());
for (const auto &N : Graph->nodes()) {
Nodes[Graph->getNodeIndex(N)] = Graph->getEdgeIndex(*N.edges_begin());
}
Nodes[Graph->nodes_size()] = Graph->edges_size(); // terminator node
for (const auto &E : Graph->edges()) {
Edges[Graph->getEdgeIndex(E)] = Graph->getNodeIndex(*E.getDest());
EdgeValues[Graph->getEdgeIndex(E)] = E.getValue();
}
OptimizeCut(Nodes.get(), Graph->nodes_size(), Edges.get(), EdgeValues.get(),
EdgeCuts.get(), Graph->edges_size());
for (int I = 0; I < Graph->edges_size(); ++I)
if (EdgeCuts[I])
CutEdges.set(I);
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
FencesInserted += insertFences(MF, *Graph, CutEdges);
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
Graph = GraphBuilder::trim(*Graph, MachineGadgetGraph::NodeSet{*Graph},
CutEdges);
} while (true);
return FencesInserted;
}
int X86LoadValueInjectionLoadHardeningPass::hardenLoadsWithGreedyHeuristic(
MachineFunction &MF, std::unique_ptr<MachineGadgetGraph> Graph) const {
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
Graph = trimMitigatedEdges(std::move(Graph));
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
if (Graph->NumGadgets == 0)
return 0;
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
MachineGadgetGraph::NodeSet ElimNodes{*Graph}, GadgetSinks{*Graph};
MachineGadgetGraph::EdgeSet ElimEdges{*Graph}, CutEdges{*Graph};
auto IsCFGEdge = [&ElimEdges, &CutEdges](const MachineGadgetGraph::Edge &E) {
return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
MachineGadgetGraph::isCFGEdge(E);
};
auto IsGadgetEdge = [&ElimEdges,
&CutEdges](const MachineGadgetGraph::Edge &E) {
return !ElimEdges.contains(E) && !CutEdges.contains(E) &&
MachineGadgetGraph::isGadgetEdge(E);
};
// FIXME: this is O(E^2), we could probably do better.
do {
// Find the cheapest CFG edge that will eliminate a gadget (by being
// egress from a SOURCE node or ingress to a SINK node), and cut it.
const MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
// First, collect all gadget source and sink nodes.
MachineGadgetGraph::NodeSet GadgetSources{*Graph}, GadgetSinks{*Graph};
for (const auto &N : Graph->nodes()) {
if (ElimNodes.contains(N))
continue;
for (const auto &E : N.edges()) {
if (IsGadgetEdge(E)) {
GadgetSources.insert(N);
GadgetSinks.insert(*E.getDest());
}
}
}
// Next, look for the cheapest CFG edge which, when cut, is guaranteed to
// mitigate at least one gadget by either:
// (a) being egress from a gadget source, or
// (b) being ingress to a gadget sink.
for (const auto &N : Graph->nodes()) {
if (ElimNodes.contains(N))
continue;
for (const auto &E : N.edges()) {
if (IsCFGEdge(E)) {
if (GadgetSources.contains(N) || GadgetSinks.contains(*E.getDest())) {
if (!CheapestSoFar || E.getValue() < CheapestSoFar->getValue())
CheapestSoFar = &E;
}
}
}
}
assert(CheapestSoFar && "Failed to cut an edge");
CutEdges.insert(*CheapestSoFar);
ElimEdges.insert(*CheapestSoFar);
} while (elimMitigatedEdgesAndNodes(*Graph, ElimEdges, ElimNodes));
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
int FencesInserted = insertFences(MF, *Graph, CutEdges);
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
return FencesInserted;
}
int X86LoadValueInjectionLoadHardeningPass::insertFences(
MachineFunction &MF, MachineGadgetGraph &G,
EdgeSet &CutEdges /* in, out */) const {
int FencesInserted = 0;
for (const auto &N : G.nodes()) {
for (const auto &E : N.edges()) {
if (CutEdges.contains(E)) {
MachineInstr *MI = N.getValue(), *Prev;
MachineBasicBlock *MBB; // Insert an LFENCE in this MBB
MachineBasicBlock::iterator InsertionPt; // ...at this point
if (MI == MachineGadgetGraph::ArgNodeSentinel) {
// insert LFENCE at beginning of entry block
MBB = &MF.front();
InsertionPt = MBB->begin();
Prev = nullptr;
} else if (MI->isBranch()) { // insert the LFENCE before the branch
MBB = MI->getParent();
InsertionPt = MI;
Prev = MI->getPrevNode();
// Remove all egress CFG edges from this branch because the inserted
// LFENCE prevents gadgets from crossing the branch.
for (const auto &E : N.edges()) {
if (MachineGadgetGraph::isCFGEdge(E))
CutEdges.insert(E);
}
} else { // insert the LFENCE after the instruction
MBB = MI->getParent();
InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
Prev = InsertionPt == MBB->end()
? (MBB->empty() ? nullptr : &MBB->back())
: InsertionPt->getPrevNode();
}
// Ensure this insertion is not redundant (two LFENCEs in sequence).
if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
(!Prev || !isFence(Prev))) {
BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
++FencesInserted;
}
}
}
}
return FencesInserted;
}
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
const MachineInstr &MI, unsigned Reg) const {
if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||
MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE)
return false;
// FIXME: This does not handle pseudo loading instruction like TCRETURN*
const MCInstrDesc &Desc = MI.getDesc();
int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
if (MemRefBeginIdx < 0) {
LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading "
"instruction:\n";
MI.print(dbgs()); dbgs() << '\n';);
return false;
}
MemRefBeginIdx += X86II::getOperandBias(Desc);
const MachineOperand &BaseMO =
MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
const MachineOperand &IndexMO =
MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister &&
TRI->regsOverlap(BaseMO.getReg(), Reg)) ||
(IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister &&
TRI->regsOverlap(IndexMO.getReg(), Reg));
}
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch(
const MachineInstr &MI, unsigned Reg) const {
if (!MI.isConditionalBranch())
return false;
for (const MachineOperand &Use : MI.uses())
if (Use.isReg() && Use.getReg() == Reg)
return true;
return false;
}
INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
"X86 LVI load hardening", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY,
"X86 LVI load hardening", false, false)
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() {
return new X86LoadValueInjectionLoadHardeningPass();
}
namespace {
/// The `X86LoadValueInjectionLoadHardeningPass` above depends on expensive
/// analysis passes that add complexity to the pipeline. This complexity
/// can cause noticable overhead when no optimizations are enabled, i.e., -O0.
/// The purpose of `X86LoadValueInjectionLoadHardeningUnoptimizedPass` is to
/// provide the same security as the optimized pass, but without adding
/// unnecessary complexity to the LLVM pipeline.
///
/// The behavior of this pass is simply to insert an LFENCE after every load
/// instruction.
class X86LoadValueInjectionLoadHardeningUnoptimizedPass
: public MachineFunctionPass {
public:
X86LoadValueInjectionLoadHardeningUnoptimizedPass()
: MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "X86 Load Value Injection (LVI) Load Hardening (Unoptimized)";
}
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
};
} // end anonymous namespace
char X86LoadValueInjectionLoadHardeningUnoptimizedPass::ID = 0;
bool X86LoadValueInjectionLoadHardeningUnoptimizedPass::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
<< " *****\n");
const X86Subtarget *STI = &MF.getSubtarget<X86Subtarget>();
if (!STI->useLVILoadHardening())
return false;
// FIXME: support 32-bit
if (!STI->is64Bit())
report_fatal_error("LVI load hardening is only supported on 64-bit", false);
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
const Function &F = MF.getFunction();
if (!F.hasOptNone() && skipFunction(F))
return false;
bool Modified = false;
++NumFunctionsConsidered;
const TargetInstrInfo *TII = STI->getInstrInfo();
for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (!MI.mayLoad() || MI.getOpcode() == X86::LFENCE ||
MI.getOpcode() == X86::MFENCE)
continue;
MachineBasicBlock::iterator InsertionPt =
MI.getNextNode() ? MI.getNextNode() : MBB.end();
BuildMI(MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
++NumFences;
Modified = true;
}
}
if (Modified)
++NumFunctionsMitigated;
return Modified;
}
INITIALIZE_PASS(X86LoadValueInjectionLoadHardeningUnoptimizedPass, PASS_KEY,
"X86 LVI load hardening", false, false)
FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningUnoptimizedPass() {
return new X86LoadValueInjectionLoadHardeningUnoptimizedPass();
}

View File

@ -0,0 +1,143 @@
//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// Description: Replaces every `ret` instruction with the sequence:
/// ```
/// pop <scratch-reg>
/// lfence
/// jmp *<scratch-reg>
/// ```
/// where `<scratch-reg>` is some available scratch register, according to the
/// calling convention of the function being mitigated.
///
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include <bitset>
using namespace llvm;
#define PASS_KEY "x86-lvi-ret"
#define DEBUG_TYPE PASS_KEY
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
"were deployed");
namespace {
class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass {
public:
X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "X86 Load Value Injection (LVI) Ret-Hardening";
}
bool runOnMachineFunction(MachineFunction &MF) override;
static char ID;
};
} // end anonymous namespace
char X86LoadValueInjectionRetHardeningPass::ID = 0;
bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName()
<< " *****\n");
const X86Subtarget *Subtarget = &MF.getSubtarget<X86Subtarget>();
if (!Subtarget->useLVIControlFlowIntegrity() || !Subtarget->is64Bit())
return false; // FIXME: support 32-bit
// Don't skip functions with the "optnone" attr but participate in opt-bisect.
const Function &F = MF.getFunction();
if (!F.hasOptNone() && skipFunction(F))
return false;
++NumFunctionsConsidered;
const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
const X86InstrInfo *TII = Subtarget->getInstrInfo();
unsigned ClobberReg = X86::NoRegister;
std::bitset<X86::NUM_TARGET_REGS> UnclobberableGR64s;
UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer
UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer
UnclobberableGR64s.set(X86::RAX); // used for function return
UnclobberableGR64s.set(X86::RDX); // used for function return
// We can clobber any register allowed by the function's calling convention.
for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR)
UnclobberableGR64s.set(Reg);
for (auto &Reg : X86::GR64RegClass) {
if (!UnclobberableGR64s.test(Reg)) {
ClobberReg = Reg;
break;
}
}
if (ClobberReg != X86::NoRegister) {
LLVM_DEBUG(dbgs() << "Selected register "
<< Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg)
<< " to clobber\n");
} else {
LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n");
}
bool Modified = false;
for (auto &MBB : MF) {
if (MBB.empty())
continue;
MachineInstr &MI = MBB.back();
if (MI.getOpcode() != X86::RETQ)
continue;
if (ClobberReg != X86::NoRegister) {
MBB.erase_instr(&MI);
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r))
.addReg(ClobberReg, RegState::Define)
.setMIFlag(MachineInstr::FrameDestroy);
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE));
BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r))
.addReg(ClobberReg);
} else {
// In case there is no available scratch register, we can still read from
// RSP to assert that RSP points to a valid page. The write to RSP is
// also helpful because it verifies that the stack's write permissions
// are intact.
MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE));
addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)),
X86::RSP, false, 0)
.addImm(0)
->addRegisterDead(X86::EFLAGS, TRI);
}
++NumFences;
Modified = true;
}
if (Modified)
++NumFunctionsMitigated;
return Modified;
}
INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY,
"X86 LVI ret hardener", false, false)
FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() {
return new X86LoadValueInjectionRetHardeningPass();
}

View File

@ -1220,8 +1220,8 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
break;
case MachineOperand::MO_Register:
// FIXME: Add retpoline support and remove this.
if (Subtarget->useRetpolineIndirectCalls())
report_fatal_error("Lowering register statepoints with retpoline not "
if (Subtarget->useIndirectThunkCalls())
report_fatal_error("Lowering register statepoints with thunks not "
"yet implemented.");
CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
CallOpcode = X86::CALL64r;
@ -1399,9 +1399,9 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
EmitAndCountInstruction(
MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
// FIXME: Add retpoline support and remove this.
if (Subtarget->useRetpolineIndirectCalls())
if (Subtarget->useIndirectThunkCalls())
report_fatal_error(
"Lowering patchpoint with retpoline not yet implemented.");
"Lowering patchpoint with thunks not yet implemented.");
EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
}

View File

@ -1,286 +0,0 @@
//======- X86RetpolineThunks.cpp - Construct retpoline thunks for x86 --=====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Pass that injects an MI thunk implementing a "retpoline". This is
/// a RET-implemented trampoline that is used to lower indirect calls in a way
/// that prevents speculation on some x86 processors and can be used to mitigate
/// security vulnerabilities due to targeted speculative execution and side
/// channels such as CVE-2017-5715.
///
/// TODO(chandlerc): All of this code could use better comments and
/// documentation.
///
//===----------------------------------------------------------------------===//
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "x86-retpoline-thunks"
static const char ThunkNamePrefix[] = "__llvm_retpoline_";
static const char R11ThunkName[] = "__llvm_retpoline_r11";
static const char EAXThunkName[] = "__llvm_retpoline_eax";
static const char ECXThunkName[] = "__llvm_retpoline_ecx";
static const char EDXThunkName[] = "__llvm_retpoline_edx";
static const char EDIThunkName[] = "__llvm_retpoline_edi";
namespace {
class X86RetpolineThunks : public MachineFunctionPass {
public:
static char ID;
X86RetpolineThunks() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "X86 Retpoline Thunks"; }
bool doInitialization(Module &M) override;
bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
}
private:
MachineModuleInfo *MMI = nullptr;
const TargetMachine *TM = nullptr;
bool Is64Bit = false;
const X86Subtarget *STI = nullptr;
const X86InstrInfo *TII = nullptr;
bool InsertedThunks = false;
void createThunkFunction(Module &M, StringRef Name);
void insertRegReturnAddrClobber(MachineBasicBlock &MBB, unsigned Reg);
void populateThunk(MachineFunction &MF, unsigned Reg);
};
} // end anonymous namespace
FunctionPass *llvm::createX86RetpolineThunksPass() {
return new X86RetpolineThunks();
}
char X86RetpolineThunks::ID = 0;
bool X86RetpolineThunks::doInitialization(Module &M) {
InsertedThunks = false;
return false;
}
bool X86RetpolineThunks::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << getPassName() << '\n');
TM = &MF.getTarget();;
STI = &MF.getSubtarget<X86Subtarget>();
TII = STI->getInstrInfo();
Is64Bit = TM->getTargetTriple().getArch() == Triple::x86_64;
MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
Module &M = const_cast<Module &>(*MMI->getModule());
// If this function is not a thunk, check to see if we need to insert
// a thunk.
if (!MF.getName().startswith(ThunkNamePrefix)) {
// If we've already inserted a thunk, nothing else to do.
if (InsertedThunks)
return false;
// Only add a thunk if one of the functions has the retpoline feature
// enabled in its subtarget, and doesn't enable external thunks.
// FIXME: Conditionalize on indirect calls so we don't emit a thunk when
// nothing will end up calling it.
// FIXME: It's a little silly to look at every function just to enumerate
// the subtargets, but eventually we'll want to look at them for indirect
// calls, so maybe this is OK.
if ((!STI->useRetpolineIndirectCalls() &&
!STI->useRetpolineIndirectBranches()) ||
STI->useRetpolineExternalThunk())
return false;
// Otherwise, we need to insert the thunk.
// WARNING: This is not really a well behaving thing to do in a function
// pass. We extract the module and insert a new function (and machine
// function) directly into the module.
if (Is64Bit)
createThunkFunction(M, R11ThunkName);
else
for (StringRef Name :
{EAXThunkName, ECXThunkName, EDXThunkName, EDIThunkName})
createThunkFunction(M, Name);
InsertedThunks = true;
return true;
}
// If this *is* a thunk function, we need to populate it with the correct MI.
if (Is64Bit) {
assert(MF.getName() == "__llvm_retpoline_r11" &&
"Should only have an r11 thunk on 64-bit targets");
// __llvm_retpoline_r11:
// callq .Lr11_call_target
// .Lr11_capture_spec:
// pause
// lfence
// jmp .Lr11_capture_spec
// .align 16
// .Lr11_call_target:
// movq %r11, (%rsp)
// retq
populateThunk(MF, X86::R11);
} else {
// For 32-bit targets we need to emit a collection of thunks for various
// possible scratch registers as well as a fallback that uses EDI, which is
// normally callee saved.
// __llvm_retpoline_eax:
// calll .Leax_call_target
// .Leax_capture_spec:
// pause
// jmp .Leax_capture_spec
// .align 16
// .Leax_call_target:
// movl %eax, (%esp) # Clobber return addr
// retl
//
// __llvm_retpoline_ecx:
// ... # Same setup
// movl %ecx, (%esp)
// retl
//
// __llvm_retpoline_edx:
// ... # Same setup
// movl %edx, (%esp)
// retl
//
// __llvm_retpoline_edi:
// ... # Same setup
// movl %edi, (%esp)
// retl
if (MF.getName() == EAXThunkName)
populateThunk(MF, X86::EAX);
else if (MF.getName() == ECXThunkName)
populateThunk(MF, X86::ECX);
else if (MF.getName() == EDXThunkName)
populateThunk(MF, X86::EDX);
else if (MF.getName() == EDIThunkName)
populateThunk(MF, X86::EDI);
else
llvm_unreachable("Invalid thunk name on x86-32!");
}
return true;
}
void X86RetpolineThunks::createThunkFunction(Module &M, StringRef Name) {
assert(Name.startswith(ThunkNamePrefix) &&
"Created a thunk with an unexpected prefix!");
LLVMContext &Ctx = M.getContext();
auto Type = FunctionType::get(Type::getVoidTy(Ctx), false);
Function *F =
Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M);
F->setVisibility(GlobalValue::HiddenVisibility);
F->setComdat(M.getOrInsertComdat(Name));
// Add Attributes so that we don't create a frame, unwind information, or
// inline.
AttrBuilder B;
B.addAttribute(llvm::Attribute::NoUnwind);
B.addAttribute(llvm::Attribute::Naked);
F->addAttributes(llvm::AttributeList::FunctionIndex, B);
// Populate our function a bit so that we can verify.
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
IRBuilder<> Builder(Entry);
Builder.CreateRetVoid();
// MachineFunctions/MachineBasicBlocks aren't created automatically for the
// IR-level constructs we already made. Create them and insert them into the
// module.
MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry);
// Insert EntryMBB into MF. It's not in the module until we do this.
MF.insert(MF.end(), EntryMBB);
}
void X86RetpolineThunks::insertRegReturnAddrClobber(MachineBasicBlock &MBB,
unsigned Reg) {
const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
const unsigned SPReg = Is64Bit ? X86::RSP : X86::ESP;
addRegOffset(BuildMI(&MBB, DebugLoc(), TII->get(MovOpc)), SPReg, false, 0)
.addReg(Reg);
}
void X86RetpolineThunks::populateThunk(MachineFunction &MF,
unsigned Reg) {
// Set MF properties. We never use vregs...
MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
// Grab the entry MBB and erase any other blocks. O0 codegen appears to
// generate two bbs for the entry block.
MachineBasicBlock *Entry = &MF.front();
Entry->clear();
while (MF.size() > 1)
MF.erase(std::next(MF.begin()));
MachineBasicBlock *CaptureSpec = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MachineBasicBlock *CallTarget = MF.CreateMachineBasicBlock(Entry->getBasicBlock());
MCSymbol *TargetSym = MF.getContext().createTempSymbol();
MF.push_back(CaptureSpec);
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
Entry->addLiveIn(Reg);
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
// The MIR verifier thinks that the CALL in the entry block will fall through
// to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
// the successor, but the MIR verifier doesn't know how to cope with that.
Entry->addSuccessor(CaptureSpec);
// In the capture loop for speculation, we want to stop the processor from
// speculating as fast as possible. On Intel processors, the PAUSE instruction
// will block speculation without consuming any execution resources. On AMD
// processors, the PAUSE instruction is (essentially) a nop, so we also use an
// LFENCE instruction which they have advised will stop speculation as well
// with minimal resource utilization. We still end the capture with a jump to
// form an infinite loop to fully guarantee that no matter what implementation
// of the x86 ISA, speculating this code path never escapes.
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
CaptureSpec->setHasAddressTaken();
CaptureSpec->addSuccessor(CaptureSpec);
CallTarget->addLiveIn(Reg);
CallTarget->setHasAddressTaken();
CallTarget->setAlignment(Align(16));
insertRegReturnAddrClobber(*CallTarget, Reg);
CallTarget->back().setPreInstrSymbol(MF, TargetSym);
BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
}

View File

@ -421,6 +421,16 @@ protected:
/// than emitting one inside the compiler.
bool UseRetpolineExternalThunk = false;
/// Prevent generation of indirect call/branch instructions from memory,
/// and force all indirect call/branch instructions from a register to be
/// preceded by an LFENCE. Also decompose RET instructions into a
/// POP+LFENCE+JMP sequence.
bool UseLVIControlFlowIntegrity = false;
/// Insert LFENCE instructions to prevent data speculatively injected into
/// loads from being used maliciously.
bool UseLVILoadHardening = false;
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@ -707,8 +717,21 @@ public:
return UseRetpolineIndirectBranches;
}
bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; }
// These are generic getters that OR together all of the thunk types
// supported by the subtarget. Therefore useIndirectThunk*() will return true
// if any respective thunk feature is enabled.
bool useIndirectThunkCalls() const {
return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
}
bool useIndirectThunkBranches() const {
return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
}
bool preferMaskRegisters() const { return PreferMaskRegisters; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
bool useLVILoadHardening() const { return UseLVILoadHardening; }
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
@ -853,10 +876,10 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
/// If we are using retpolines, we need to expand indirectbr to avoid it
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
return useRetpolineIndirectBranches();
return useIndirectThunkBranches();
}
/// Enable the MachineScheduler pass for all X86 subtargets.

View File

@ -82,6 +82,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86SpeculativeLoadHardeningPassPass(PR);
initializeX86FlagsCopyLoweringPassPass(PR);
initializeX86CondBrFoldingPassPass(PR);
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
initializeX86OptimizeLEAPassPass(PR);
}
@ -496,6 +498,10 @@ void X86PassConfig::addMachineSSAOptimization() {
void X86PassConfig::addPostRegAlloc() {
addPass(createX86FloatingPointStackifierPass());
if (getOptLevel() != CodeGenOpt::None)
addPass(createX86LoadValueInjectionLoadHardeningPass());
else
addPass(createX86LoadValueInjectionLoadHardeningUnoptimizedPass());
}
void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
@ -525,7 +531,7 @@ void X86PassConfig::addPreEmitPass2() {
const Triple &TT = TM->getTargetTriple();
const MCAsmInfo *MAI = TM->getMCAsmInfo();
addPass(createX86RetpolineThunksPass());
addPass(createX86IndirectThunksPass());
// Insert extra int3 instructions after trailing call instructions to avoid
// issues in the unwinder.
@ -542,6 +548,7 @@ void X86PassConfig::addPreEmitPass2() {
// Identify valid longjmp targets for Windows Control Flow Guard.
if (TT.isOSWindows())
addPass(createCFGuardLongjmpPass());
addPass(createX86LoadValueInjectionRetHardeningPass());
}
std::unique_ptr<CSEConfigBase> X86PassConfig::getCSEConfig() const {

View File

@ -1768,7 +1768,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Constant *C2;
// C-(C2-X) --> X+(C-C2)
if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))))
if (match(Op1, m_Sub(m_Constant(C2), m_Value(X))) && !isa<ConstantExpr>(C2))
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
// C-(X+C2) --> (C-C2)-X

View File

@ -1,14 +1,14 @@
// $FreeBSD$
#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
#define CLANG_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
#define CLANG_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
#define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
#define LLD_REVISION "llvmorg-10.0.0-97-g6f71678ecd2-1300007"
#define LLD_REVISION "llvmorg-10.0.0-129-gd24d5c8e308-1300007"
#define LLD_REPOSITORY "FreeBSD"
#define LLDB_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
#define LLDB_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
#define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"

View File

@ -1,3 +1,3 @@
/* $FreeBSD$ */
#define LLVM_REVISION "llvmorg-10.0.0-97-g6f71678ecd2"
#define LLVM_REVISION "llvmorg-10.0.0-129-gd24d5c8e308"
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"

View File

@ -318,6 +318,9 @@ SRCS_MIN+= CodeGen/ProcessImplicitDefs.cpp
SRCS_MIN+= CodeGen/PrologEpilogInserter.cpp
SRCS_MIN+= CodeGen/PseudoSourceValue.cpp
SRCS_MIN+= CodeGen/ReachingDefAnalysis.cpp
SRCS_MIN+= CodeGen/RDFGraph.cpp
SRCS_MIN+= CodeGen/RDFLiveness.cpp
SRCS_MIN+= CodeGen/RDFRegisters.cpp
SRCS_MIN+= CodeGen/RegAllocBase.cpp
SRCS_MIN+= CodeGen/RegAllocBasic.cpp
SRCS_MIN+= CodeGen/RegAllocFast.cpp
@ -1284,6 +1287,7 @@ SRCS_MIN+= Target/X86/X86FrameLowering.cpp
SRCS_MIN+= Target/X86/X86ISelDAGToDAG.cpp
SRCS_MIN+= Target/X86/X86ISelLowering.cpp
SRCS_MIN+= Target/X86/X86IndirectBranchTracking.cpp
SRCS_MIN+= Target/X86/X86IndirectThunks.cpp
SRCS_MIN+= Target/X86/X86InsertPrefetch.cpp
SRCS_MIN+= Target/X86/X86InstrFMA3Info.cpp
SRCS_MIN+= Target/X86/X86InstrFoldTables.cpp
@ -1291,6 +1295,8 @@ SRCS_MIN+= Target/X86/X86InstrInfo.cpp
SRCS_MIN+= Target/X86/X86InstructionSelector.cpp
SRCS_MIN+= Target/X86/X86InterleavedAccess.cpp
SRCS_MIN+= Target/X86/X86LegalizerInfo.cpp
SRCS_MIN+= Target/X86/X86LoadValueInjectionLoadHardening.cpp
SRCS_MIN+= Target/X86/X86LoadValueInjectionRetHardening.cpp
SRCS_MIN+= Target/X86/X86MCInstLower.cpp
SRCS_MIN+= Target/X86/X86MachineFunctionInfo.cpp
SRCS_MIN+= Target/X86/X86MacroFusion.cpp
@ -1298,7 +1304,6 @@ SRCS_MIN+= Target/X86/X86OptimizeLEAs.cpp
SRCS_MIN+= Target/X86/X86PadShortFunction.cpp
SRCS_MIN+= Target/X86/X86RegisterBankInfo.cpp
SRCS_MIN+= Target/X86/X86RegisterInfo.cpp
SRCS_MIN+= Target/X86/X86RetpolineThunks.cpp
SRCS_MIN+= Target/X86/X86SelectionDAGInfo.cpp
SRCS_MIN+= Target/X86/X86ShuffleDecodeConstantPool.cpp
SRCS_MIN+= Target/X86/X86SpeculativeLoadHardening.cpp