Vendor import of llvm release_90 branch r369369:

https://llvm.org/svn/llvm-project/llvm/branches/release_90@369369
This commit is contained in:
Dimitry Andric 2019-08-20 21:35:15 +00:00
parent e6d1592492
commit 464f838b7b
49 changed files with 833 additions and 337 deletions

View File

@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
/// A pointer to the AAResults object that this AAResult is
/// aggregated within. May be null if not aggregated.
AAResults *AAR;
AAResults *AAR = nullptr;
/// Helper to dispatch calls back through the derived type.
DerivedT &derived() { return static_cast<DerivedT &>(*this); }

View File

@ -269,7 +269,13 @@ class SelectionDAG {
using CallSiteInfo = MachineFunction::CallSiteInfo;
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
struct CallSiteDbgInfo {
CallSiteInfo CSInfo;
MDNode *HeapAllocSite = nullptr;
};
DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
uint16_t NextPersistentId = 0;
@ -1664,16 +1670,28 @@ public:
}
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
SDCallSiteInfo[CallNode] = std::move(CallInfo);
SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
}
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
auto I = SDCallSiteInfo.find(CallNode);
if (I != SDCallSiteInfo.end())
return std::move(I->second);
auto I = SDCallSiteDbgInfo.find(CallNode);
if (I != SDCallSiteDbgInfo.end())
return std::move(I->second).CSInfo;
return CallSiteInfo();
}
void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
}
/// Return the HeapAllocSite type associated with the SDNode, if it exists.
MDNode *getHeapAllocSite(const SDNode *Node) {
auto It = SDCallSiteDbgInfo.find(Node);
if (It == SDCallSiteDbgInfo.end())
return nullptr;
return It->second.HeapAllocSite;
}
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);

View File

@ -3665,6 +3665,7 @@ public:
C_Register, // Constraint represents specific register(s).
C_RegisterClass, // Constraint represents any of register(s) in class.
C_Memory, // Memory constraint.
C_Immediate, // Requires an immediate.
C_Other, // Something else.
C_Unknown // Unsupported constraint.
};

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include <memory>
namespace llvm {

View File

@ -112,6 +112,9 @@ namespace llvm {
/// number of section symbols with the same name).
StringMap<bool, BumpPtrAllocator &> UsedNames;
/// Keeps track of labels that are used in inline assembly.
SymbolTable InlineAsmUsedLabelNames;
/// The next ID to dole out to an unnamed assembler temporary symbol with
/// a given prefix.
StringMap<unsigned> NextID;
@ -377,6 +380,16 @@ namespace llvm {
/// APIs.
const SymbolTable &getSymbols() const { return Symbols; }
/// isInlineAsmLabel - Return true if the name is a label referenced in
/// inline assembly.
MCSymbol *getInlineAsmLabel(StringRef Name) const {
return InlineAsmUsedLabelNames.lookup(Name);
}
/// registerInlineAsmLabel - Records that the name is a label referenced in
/// inline assembly.
void registerInlineAsmLabel(MCSymbol *Sym);
/// @}
/// \name Section Management

View File

@ -50,35 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
#endif
// FIXME: This would be nicer were it tablegen
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME

View File

@ -53,7 +53,7 @@ enum ArchExtKind : unsigned {
AEK_SVE2AES = 1 << 24,
AEK_SVE2SM4 = 1 << 25,
AEK_SVE2SHA3 = 1 << 26,
AEK_BITPERM = 1 << 27,
AEK_SVE2BITPERM = 1 << 27,
};
enum class ArchKind {

View File

@ -39,19 +39,13 @@ enum ArchExtKind : unsigned {
AEK_DSP = 1 << 10,
AEK_FP16 = 1 << 11,
AEK_RAS = 1 << 12,
AEK_SVE = 1 << 13,
AEK_DOTPROD = 1 << 14,
AEK_SHA2 = 1 << 15,
AEK_AES = 1 << 16,
AEK_FP16FML = 1 << 17,
AEK_SB = 1 << 18,
AEK_SVE2 = 1 << 19,
AEK_SVE2AES = 1 << 20,
AEK_SVE2SM4 = 1 << 21,
AEK_SVE2SHA3 = 1 << 22,
AEK_BITPERM = 1 << 23,
AEK_FP_DP = 1 << 24,
AEK_LOB = 1 << 25,
AEK_DOTPROD = 1 << 13,
AEK_SHA2 = 1 << 14,
AEK_AES = 1 << 15,
AEK_FP16FML = 1 << 16,
AEK_SB = 1 << 17,
AEK_FP_DP = 1 << 18,
AEK_LOB = 1 << 19,
// Unsupported extensions.
AEK_OS = 0x8000000,
AEK_IWMMXT = 0x10000000,

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/IR/ValueHandle.h"
#include <cstdint>
namespace llvm {
@ -28,8 +29,8 @@ class Value;
struct DivRemMapKey {
bool SignedOp;
Value *Dividend;
Value *Divisor;
AssertingVH<Value> Dividend;
AssertingVH<Value> Divisor;
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
}
static unsigned getHashValue(const DivRemMapKey &Val) {
return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
reinterpret_cast<uintptr_t>(Val.Divisor)) ^
return (unsigned)(reinterpret_cast<uintptr_t>(
static_cast<Value *>(Val.Dividend)) ^
reinterpret_cast<uintptr_t>(
static_cast<Value *>(Val.Divisor))) ^
(unsigned)Val.SignedOp;
}
};

View File

@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
Sym->print(OS, AP->MAI);
MMI->getContext().registerInlineAsmLabel(Sym);
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);

View File

@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
TheUse = InsertedShift;
}
// If we removed all uses, nuke the shift.
// If we removed all uses, or there are none, nuke the shift.
if (ShiftI->use_empty()) {
salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
MadeChange = true;
}
return MadeChange;

View File

@ -691,9 +691,17 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a restore of a debug value");
MachineFunction *MF = MI.getMF();
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
const DIExpression *NewExpr;
if (auto Fragment = DebugInstr->getDebugExpression()->getFragmentInfo())
NewExpr = *DIExpression::createFragmentExpression(DIB.createExpression(),
Fragment->OffsetInBits, Fragment->SizeInBits);
else
NewExpr = DIB.createExpression();
NewDebugInstr =
BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
NewReg, DebugInstr->getDebugVariable(), NewExpr);
VarLoc VL(*NewDebugInstr, LS);
ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
@ -848,9 +856,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
<< "\n");
}
// Check if the register or spill location is the location of a debug value.
// FIXME: Don't create a spill transfer if there is a complex expression,
// because we currently cannot recover the original expression on restore.
for (unsigned ID : OpenRanges.getVarLocs()) {
const MachineInstr *DebugInstr = &VarLocIDs[ID].MI;
if (TKind == TransferKind::TransferSpill &&
VarLocIDs[ID].isDescribedByReg() == Reg) {
VarLocIDs[ID].isDescribedByReg() == Reg &&
!DebugInstr->getDebugExpression()->isComplex()) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&

View File

@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -66,6 +67,7 @@ namespace {
AliasAnalysis *AA;
MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
MachineBlockFrequencyInfo *MBFI;
public:
static char ID; // Pass identification
@ -83,6 +85,8 @@ namespace {
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addPreserved<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@ -133,6 +137,11 @@ namespace {
bool isPRECandidate(MachineInstr *MI);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
/// Heuristics to see if it's beneficial to move common computations of MBB
/// and MBB1 to CandidateBB.
bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
MachineBasicBlock *MBB,
MachineBasicBlock *MBB1);
};
} // end anonymous namespace
@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (!CMBB->isLegalToHoistInto())
continue;
if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
continue;
// Two instrs are partial redundant if their basic blocks are reachable
// from one to another but one doesn't dominate another.
if (CMBB != MBB1) {
@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
return Changed;
}
bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
MachineBasicBlock *MBB,
MachineBasicBlock *MBB1) {
if (CandidateBB->getParent()->getFunction().hasMinSize())
return true;
assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
assert(DT->dominates(CandidateBB, MBB1) &&
"CandidateBB should dominate MBB1");
return MBFI->getBlockFreq(CandidateBB) <=
MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
}
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);

View File

@ -121,7 +121,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
Entry.Symbols.push_back(Context.createTempSymbol());
Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
return Entry.Symbols;
}

View File

@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
if (MDNode *MD = DAG->getHeapAllocSite(N)) {
if (NewInsn && NewInsn->isCall())
MF.addCodeViewHeapAllocSite(NewInsn, MD);
}
GluedNodes.pop_back();
}
auto NewInsn =
@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
if (NewInsn && NewInsn->isCall())
MF.addCodeViewHeapAllocSite(NewInsn, MD);
}
}
// Insert all the dbg_values which have not already been inserted in source

View File

@ -1084,6 +1084,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
SDCallSiteDbgInfo.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),

View File

@ -8021,6 +8021,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
if (T.ConstraintType == TargetLowering::C_Immediate &&
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
// We've delayed emitting a diagnostic like the "n" constraint because
// inlining could cause an integer showing up.
return emitInlineAsmError(
CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
"integer constant expression");
ExtraInfo.update(T);
}
@ -8105,7 +8113,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
(OpInfo.ConstraintType == TargetLowering::C_Other &&
((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
@ -8119,13 +8128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
} else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
} else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
!OpInfo.isIndirect) ||
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
// Otherwise, this outputs to a register (directly for C_Register /
// C_RegisterClass, and a target-defined fashion for C_Other). Find a
// register that we can use.
// C_RegisterClass, and a target-defined fashion for
// C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
CS, "couldn't allocate output register for constraint '" +
@ -8205,15 +8215,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
if (OpInfo.ConstraintType == TargetLowering::C_Other &&
if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
if (isa<ConstantSDNode>(InOperandVal)) {
emitInlineAsmError(CS, "value out of range for constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
}
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
@ -8250,7 +8269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
"Unknown constraint type!");
// TODO: Support this.
@ -8356,6 +8376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Val = OpInfo.AssignedRegs.getCopyFromRegs(
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);

View File

@ -3567,15 +3567,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'r': return C_RegisterClass;
case 'r':
return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
return C_Immediate;
case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
@ -3950,6 +3952,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@ -4069,11 +4072,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
// If this is an 'other' constraint, see if the operand is valid for it.
// For example, on X86 we might have an 'rI' constraint. If the operand
// is an integer in the range [0..31] we want to use I (saving a load
// of a register), otherwise we must use 'r'.
if (CType == TargetLowering::C_Other && Op.getNode()) {
// If this is an 'other' or 'immediate' constraint, see if the operand is
// valid for it. For example, on X86 we might have an 'rI' constraint. If
// the operand is an integer in the range [0..31] we want to use I (saving a
// load of a register), otherwise we must use 'r'.
if ((CType == TargetLowering::C_Other ||
CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;

View File

@ -61,6 +61,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
bool DoAutoReset)
: SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
Symbols(Allocator), UsedNames(Allocator),
InlineAsmUsedLabelNames(Allocator),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
AutoReset(DoAutoReset) {
SecureLogFile = AsSecureLogFileName;
@ -90,6 +91,7 @@ void MCContext::reset() {
XCOFFAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
InlineAsmUsedLabelNames.clear();
UsedNames.clear();
Symbols.clear();
Allocator.Reset();
@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer,
Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
}
void MCContext::registerInlineAsmLabel(MCSymbol *Sym) {
InlineAsmUsedLabelNames[Sym->getName()] = Sym;
}
//===----------------------------------------------------------------------===//
// Section Management
//===----------------------------------------------------------------------===//

View File

@ -1142,7 +1142,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
}
MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
if (!Sym)
Sym = getContext().getOrCreateSymbol(SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.

View File

@ -90,9 +90,9 @@ static bool supportsBPF(uint64_t Type) {
static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
switch (R.getType()) {
case ELF::R_BPF_64_32:
return S & 0xFFFFFFFF;
return (S + A) & 0xFFFFFFFF;
case ELF::R_BPF_64_64:
return S;
return S + A;
default:
llvm_unreachable("Invalid relocation type");
}

View File

@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+sve2-sm4");
if (Extensions & AEK_SVE2SHA3)
Features.push_back("+sve2-sha3");
if (Extensions & AEK_BITPERM)
Features.push_back("+bitperm");
if (Extensions & AEK_SVE2BITPERM)
Features.push_back("+sve2-bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");

View File

@ -1200,7 +1200,7 @@ namespace fs {
/// implementation.
std::error_code copy_file(const Twine &From, const Twine &To) {
uint32_t Flag = COPYFILE_DATA;
#if __has_builtin(__builtin_available)
#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
if (__builtin_available(macos 10.12, *)) {
bool IsSymlink;
if (std::error_code Error = is_symlink_file(From, IsSymlink))

View File

@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",

View File

@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
switch (Constraint[0]) {
default:
break;
case 'z':
return C_Other;
case 'x':
case 'w':
return C_RegisterClass;
@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'Y':
case 'Z':
return C_Immediate;
case 'z':
case 'S': // A symbolic address
return C_Other;
}

View File

@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,

View File

@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in {
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
// SVE2 predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in {
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
// SVE2 bitwise shift and insert
defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
// SVE2 bitwise shift right and accumulate
defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in {
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
// SVE2 bitwise shift right narrow
defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
// SVE2 bitwise shift right narrow (bottom)
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
// SVE2 integer add/subtract narrow high part
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
// SVE2 bitwise shift right narrow (top)
defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
// SVE2 saturating extract narrow
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
// SVE2 integer add/subtract narrow high part (bottom)
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
// SVE2 integer add/subtract narrow high part (top)
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
// SVE2 saturating extract narrow (bottom)
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
// SVE2 saturating extract narrow (top)
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in {
// SVE2 histogram generation (vector)
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
// SVE2 floating-point base 2 logarithm as integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
// SVE2 floating-point convert precision
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
// SVE2 floating-point pairwise operations
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in {
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
// sve_int_rotate_imm
// SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
// SVE floating-point convert precision
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
// SVE floating-point convert to integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
// Non-temporal contiguous loads (vector + register)
defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
// Predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
// Non-temporal contiguous stores (vector + register)
defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
// SVE table lookup (three sources)
// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
// SVE integer compare scalar count and limit
// SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in {
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
// SVE pointer conflict compare
// SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
}

View File

@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
AArch64TTIImpl::TTI::MemCmpExpansionOptions
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
TTI::MemCmpExpansionOptions Options;
Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
Options.NumLoadsPerBlock = Options.MaxNumLoads;
// TODO: Though vector loads usually perform well on AArch64, in some targets
// they may wake up the FP unit, which raises the power consumption. Perhaps
// they could be used with no holds barred (-O3).
Options.LoadSizes = {8, 4, 2, 1};
return Options;
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {

View File

@ -130,6 +130,9 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);

View File

@ -2840,7 +2840,7 @@ static const struct Extension {
{"sve2-aes", {AArch64::FeatureSVE2AES}},
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
{"bitperm", {AArch64::FeatureSVE2BitPerm}},
{"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},

View File

@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
ZPRRegOp zprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
asm, "\t$Zdn, $Pg",
ZPRRegOp zprty, PPRRegOp pprty>
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm),
asm, "\t$Zdn, $Pm",
"",
[]>, Sched<[]> {
bits<4> Pg;
bits<4> Pm;
bits<5> Zdn;
let Inst{31-24} = 0b00100101;
let Inst{23-22} = sz8_64;
@ -416,7 +416,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{18-16} = opc{4-2};
let Inst{15-11} = 0b10000;
let Inst{10-9} = opc{1-0};
let Inst{8-5} = Pg;
let Inst{8-5} = Pm;
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
@ -425,9 +425,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
def : InstAlias<asm # "\t$Zdn, $Pm",
(!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
def : InstAlias<asm # "\t$Zdn, $Pm",
(!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>;
def : InstAlias<asm # "\t$Zdn, $Pm",
(!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
}
class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl<string asm> {
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
[]>, Sched<[]> {
@ -758,6 +765,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{15-10} = 0b001011;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_perm_tbx<string asm> {
@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd<string asm> {
class sve2_fp_convert_precision<bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
[]>, Sched<[]> {
@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_fp_convert_down_narrow<string asm> {
@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
}
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
}
}
multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
}
class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<5> Zm;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = sz;
let Inst{21} = 0b0;
let Inst{20-16} = Zm;
let Inst{15-11} = 0b10010;
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
}
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
Operand immtype>
@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
// SVE2 Accumulate Group
//===----------------------------------------------------------------------===//
class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty, Operand immtype>
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
asm, "\t$Zda, $Zn, $imm",
"", []>, Sched<[]> {
@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm
let ElementSize = ElementSizeNone;
}
multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
// SVE2 Narrowing Group
//===----------------------------------------------------------------------===//
class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
string asm, ZPRRegOp zprty1,
ZPRRegOp zprty2, Operand immtype>
class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
string asm, ZPRRegOp zprty1,
ZPRRegOp zprty2, Operand immtype>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
let Inst{13-10} = opc;
let Inst{13-11} = opc;
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
vecshiftR8>;
def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
vecshiftR16> {
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
}
class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
string asm, ZPRRegOp zprty1,
ZPRRegOp zprty2, Operand immtype>
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<5> imm;
let Inst{31-23} = 0b010001010;
let Inst{22} = tsz8_64{2};
let Inst{21} = 0b1;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
let Inst{13-11} = opc;
let Inst{10} = 0b1;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
vecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
vecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zd;
@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
let Inst{15-13} = 0b011;
let Inst{12-10} = opc; // S, R, T
let Inst{12-11} = opc; // S, R
let Inst{10} = 0b0; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
}
class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
bits<5> Zm;
let Inst{31-24} = 0b01000101;
let Inst{23-22} = sz;
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
let Inst{15-13} = 0b011;
let Inst{12-11} = opc; // S, R
let Inst{10} = 0b1; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
}
class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<5> Zd;
@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-13} = 0b000010;
let Inst{12-10} = opc;
let Inst{12-11} = opc;
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
}
multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
}
class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn),
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<5> Zd;
bits<5> Zn;
let Inst{31-23} = 0b010001010;
let Inst{22} = tsz8_64{2};
let Inst{21} = 0b1;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-13} = 0b000010;
let Inst{12-11} = opc;
let Inst{10} = 0b1;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
}
//===----------------------------------------------------------------------===//
@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
}
class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
RegisterOperand VecList>
: I<(outs VecList:$Zt), iops,
class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty>
: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, "\t$Zt, $Pg, [$Zn, $Rm]",
"",
[]>, Sched<[]> {
@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
let mayStore = 1;
}
multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, listty>;
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}
@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill<string asm> {
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
}
class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
RegisterOperand VecList>
: I<(outs VecList:$Zt), iops,
asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
let mayLoad = 1;
}
multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}

View File

@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
unsigned S = Constraint.size();
if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'l': return C_RegisterClass;
@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
case 'h': return C_RegisterClass;
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
case 'j': return C_Other; // Constant for movw.
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as an 'r' memory constraint.
case 'j': return C_Immediate; // Constant for movw.
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as an 'r' memory constraint.
case 'Q': return C_Memory;
}
} else if (Constraint.size() == 2) {
} else if (S == 2) {
switch (Constraint[0]) {
default: break;
case 'T': return C_RegisterClass;

View File

@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
Sched<[WriteBrTbl]> {
let Size = 2;
let isNotDuplicable = 1;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
// and make use of the same compressed jump table format as Thumb-2.
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
isIndirectBranch = 1 in {
isIndirectBranch = 1, isNotDuplicable = 1 in {
def tTBB_JT : tPseudoInst<(outs),
(ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
IIC_Br, []>, Sched<[WriteBr]>;

View File

@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
switch (Constraint[0]) {
default:
break;
case 'a': // Simple upper registers
case 'b': // Base pointer registers pairs
case 'd': // Upper register
@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
case 'O': // Integer constant (Range: 8, 16, 24)
case 'P': // Integer constant (Range: 1)
case 'R': // Integer constant (Range: -6 to 5)x
return C_Other;
default:
break;
return C_Immediate;
}
}

View File

@ -116,9 +116,8 @@ private:
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
std::string &AccessKey, uint32_t Kind,
MDNode *&TypeMeta);
Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey,
uint32_t Kind, MDNode *&TypeMeta);
bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
};
@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
/// pointer of the first preserve_*_access_index call, and construct the access
/// string, which will be the name of a global variable.
Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
std::string &AccessStr,
Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
std::string &AccessKey,
uint32_t Kind,
MDNode *&TypeMeta) {
@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
return nullptr;
// Construct the type string AccessStr.
// Construct the type string AccessKey.
for (unsigned I = 0; I < AccessIndices.size(); ++I)
AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey;
if (TypeNameIndex == AccessIndices.size() - 1)
AccessStr = "0:" + AccessStr;
AccessKey = "0:" + AccessKey;
// Access key is the type name + access string, uniquely identifying
// one kernel memory access.
AccessKey = LastTypeName + ":" + AccessStr;
AccessKey = LastTypeName + ":" + AccessKey;
return Base;
}
@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
/// transformation to a chain of relocable GEPs.
bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
uint32_t Kind) {
std::string AccessStr, AccessKey;
std::string AccessKey;
MDNode *TypeMeta = nullptr;
Value *Base =
computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta);
if (!Base)
return false;
@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
GlobalVariable::ExternalLinkage, NULL, AccessStr);
GlobalVariable::ExternalLinkage, NULL, AccessKey);
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
// Set the metadata (debuginfo types) for the global.
if (TypeMeta)

View File

@ -30,6 +30,18 @@ static const char *BTFKindStr[] = {
#include "BTF.def"
};
static const DIType * stripQualifiers(const DIType *Ty) {
while (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
unsigned Tag = DTy->getTag();
if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type)
break;
Ty = DTy->getBaseType();
}
return Ty;
}
/// Emit a BTF common type.
void BTFTypeBase::emitType(MCStreamer &OS) {
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
}
}
BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
uint32_t NumElems)
: ElemSize(ElemSize) {
BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
uint32_t ElemSize, uint32_t NumElems)
: ElemTyNoQual(Ty), ElemSize(ElemSize) {
Kind = BTF::BTF_KIND_ARRAY;
BTFType.NameOff = 0;
BTFType.Info = Kind << 24;
@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) {
// created during initial type traversal. Just
// retrieve that type id.
ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual)
: ArrayInfo.ElemType;
}
void BTFTypeArray::emitType(MCStreamer &OS) {
@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
uint32_t &ElementTypeId) {
ElementTypeId = ArrayInfo.ElemType;
ElementTypeId = ElemTypeNoQual;
LocOffset = Loc * ElemSize;
}
@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
} else {
BTFMember.Offset = DDTy->getOffsetInBits();
}
BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
const auto *BaseTy = DDTy->getBaseType();
BTFMember.Type = BDebug.getTypeId(BaseTy);
MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy)));
Members.push_back(BTFMember);
}
}
@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); }
void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
uint32_t &MemberType) {
MemberType = Members[Loc].Type;
MemberType = MemberTypeNoQual[Loc];
MemberOffset =
HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
}
@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
uint32_t ElemTypeId, ElemSize;
const DIType *ElemType = CTy->getBaseType();
visitTypeEntry(ElemType, ElemTypeId, false, false);
// Strip qualifiers from element type to get accurate element size.
ElemType = stripQualifiers(ElemType);
ElemSize = ElemType->getSizeInBits() >> 3;
if (!CTy->getSizeInBits()) {
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemType, ElemTypeId, 0, 0);
ArrayTypes.push_back(TypeEntry.get());
ElemTypeId = addType(std::move(TypeEntry), CTy);
} else {
@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
const DISubrange *SR = cast<DISubrange>(Element);
auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
int64_t Count = CI->getSExtValue();
const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr;
auto TypeEntry =
llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
llvm::make_unique<BTFTypeArray>(ArrayElemTy, ElemTypeId,
ElemSize, Count);
ArrayTypes.push_back(TypeEntry.get());
if (I == 0)
ElemTypeId = addType(std::move(TypeEntry), CTy);
@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
unsigned RootId = populateStructType(RootTy);
setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
unsigned RootTySize = PrevStructType->getStructSize();
StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1);
BTFOffsetReloc OffsetReloc;
OffsetReloc.Label = ORSym;
OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back());
OffsetReloc.TypeID = RootId;
uint32_t Start = 0, End = 0, Offset = 0;
bool FirstAccess = true;
for (auto C : AccessPattern) {
for (auto C : IndexPattern) {
if (C != ':') {
End++;
} else {
std::string SubStr = AccessPattern.substr(Start, End - Start);
std::string SubStr = IndexPattern.substr(Start, End - Start);
int Loc = std::stoi(SubStr);
if (FirstAccess) {
@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
Offset += LocOffset;
PrevArrayType = nullptr;
setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
} else {
llvm_unreachable("Internal Error: BTF offset relocation type traversal error");
}
Start = End + 1;
End = Start;
}
}
AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
AccessOffsets[AccessPattern.str()] = Offset;
OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
}
@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
DIType *Ty = dyn_cast<DIType>(MDN);
std::string TypeName = Ty->getName();
int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
int64_t Imm = AccessOffsets[GVar->getName().str()];
// Emit "mov ri, <imm>" for abstract member accesses.
OutMI.setOpcode(BPF::MOV_ri);

View File

@ -104,11 +104,14 @@ public:
/// Handle array type.
class BTFTypeArray : public BTFTypeBase {
const DIType *ElemTyNoQual;
uint32_t ElemSize;
struct BTF::BTFArray ArrayInfo;
uint32_t ElemTypeNoQual;
public:
BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
uint32_t ElemSize, uint32_t NumElems);
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase {
const DICompositeType *STy;
bool HasBitField;
std::vector<struct BTF::BTFMember> Members;
std::vector<uint32_t> MemberTypeNoQual;
public:
BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,

View File

@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
Res = V;
} else
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
MCBinaryExpr::Opcode Opcode;
switch (getLexer().getKind()) {
default:
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
break;
case AsmToken::Minus:
Opcode = MCBinaryExpr::Sub;
break;
}
const MCExpr *Expr;
if (getParser().parseExpression(Expr))
return MatchOperand_ParseFail;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}

View File

@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
uint64_t FrameSize = MFI.getStackSize();
// Get the alignment.
uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
: getStackAlignment();
unsigned StackAlign = getStackAlignment();
if (RI->needsStackRealignment(MF)) {
unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
FrameSize += (MaxStackAlign - StackAlign);
StackAlign = MaxStackAlign;
}
// Set Max Call Frame Size
uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
MFI.setMaxCallFrameSize(MaxCallSize);
// Make sure the frame is aligned.
FrameSize = alignTo(FrameSize, StackAlign);
@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
const RISCVInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
report_fatal_error(
"RISC-V backend can't currently handle functions that need stack "
"realignment and have variable sized objects");
}
unsigned FPReg = getFPReg(STI);
unsigned SPReg = getSPReg(STI);
@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
nullptr, RI->getDwarfRegNum(FPReg, true), 0));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// Realign Stack
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
if (RI->needsStackRealignment(MF)) {
unsigned MaxAlignment = MFI.getMaxAlignment();
const RISCVInstrInfo *TII = STI.getInstrInfo();
if (isInt<12>(-(int)MaxAlignment)) {
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
.addReg(SPReg)
.addImm(-(int)MaxAlignment);
} else {
unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
unsigned VR =
MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
.addReg(SPReg)
.addImm(ShiftAmount);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
.addReg(VR)
.addImm(ShiftAmount);
}
}
}
}
@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
if (FI >= MinCSFI && FI <= MaxCSFI) {
FrameReg = RISCV::X2;
Offset += MF.getFrameInfo().getStackSize();
} else if (RI->needsStackRealignment(MF)) {
assert(!MFI.hasVarSizedObjects() &&
"Unexpected combination of stack realignment and varsized objects");
// If the stack was realigned, the frame pointer is set in order to allow
// SP to be restored, but we still access stack objects using SP.
FrameReg = RISCV::X2;
Offset += MF.getFrameInfo().getStackSize();
} else {
FrameReg = RI->getFrameRegister(MF);
if (hasFP(MF))

View File

@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// We can materialise `c1 << c2` into an add immediate, so it's "free",
// and the combine should happen, to potentially allow further combines
// later.
if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
if (ShiftedC1Int.getMinSignedBits() <= 64 &&
isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
return true;
// We can materialise `c1` in an add immediate, so it's "free", and the
// combine should be prevented.
if (isLegalAddImmediate(C1Int.getSExtValue()))
if (C1Int.getMinSignedBits() <= 64 &&
isLegalAddImmediate(C1Int.getSExtValue()))
return false;
// Neither constant will fit into an immediate, so find materialisation
@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
RISCVTargetLowering::ConstraintType
RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
break;
case 'f':
return C_RegisterClass;
case 'I':
case 'J':
case 'K':
return C_Immediate;
}
}
return TargetLowering::getConstraintType(Constraint);
}
std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (Constraint[0]) {
case 'r':
return std::make_pair(0U, &RISCV::GPRRegClass);
case 'f':
if (Subtarget.hasStdExtF() && VT == MVT::f32)
return std::make_pair(0U, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD() && VT == MVT::f64)
return std::make_pair(0U, &RISCV::FPR64RegClass);
break;
default:
break;
}

View File

@ -92,6 +92,7 @@ public:
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
ConstraintType getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;

View File

@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const {
case 'e':
return C_RegisterClass;
case 'I': // SIMM13
return C_Other;
return C_Immediate;
}
}

View File

@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
case 'K': // Signed 16-bit constant
case 'L': // Signed 20-bit displacement (on all targets we support)
case 'M': // 0x7fffffff
return C_Other;
return C_Immediate;
default:
break;

View File

@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b">;
"64-bit with cmpxchg16b",
[FeatureCMPXCHG8B]>;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",

View File

@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
Complexity += 2;
}
// Heuristic: try harder to form an LEA from ADD if the operands set flags.
// Unlike ADD, LEA does not affect flags, so we will be less likely to require
// duplicating flag-producing instructions later in the pipeline.
if (N.getOpcode() == ISD::ADD) {
auto isMathWithFlags = [](SDValue V) {
switch (V.getOpcode()) {
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::ADC:
case X86ISD::SBB:
/* TODO: These opcodes can be added safely, but we may want to justify
their inclusion for different reasons (better for reg-alloc).
case X86ISD::SMUL:
case X86ISD::UMUL:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
*/
// Value 1 is the flag output of the node - verify it's not dead.
return !SDValue(V.getNode(), 1).use_empty();
default:
return false;
}
};
// TODO: This could be an 'or' rather than 'and' to make the transform more
// likely to happen. We might want to factor in whether there's a
// load folding opportunity for the math op that disappears with LEA.
if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
Complexity++;
}
if (AM.Disp)
Complexity++;
@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDValue ImplDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
NBits);
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
NBits = SDValue(
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
NBits, SRIdxVal), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
if (Subtarget->hasBMI2()) {

View File

@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
// Save heapallocsite metadata.
if (CLI.CS)
if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
Idx == (VT.getVectorNumElements() / 2) &&
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
Src.getOperand(1).getValueType() == SubVT &&
isNullConstant(Src.getOperand(2))) {
Ops.push_back(Src.getOperand(1));
Ops.push_back(Sub);
@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
case X86ISD::SUBV_BROADCAST: {
// Reduce size of broadcast if we don't need the upper half.
unsigned HalfElts = NumElts / 2;
if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
SDValue Half = Src;
if (SrcVT.getVectorNumElements() != HalfElts) {
MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
}
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
TLO.DAG, SDLoc(Op),
Half.getValueSizeInBits()));
}
break;
}
case X86ISD::VPERMV: {
SDValue Mask = Op.getOperand(0);
APInt MaskUndef, MaskZero;
@ -34134,6 +34121,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
// Subvector broadcast.
case X86ISD::SUBV_BROADCAST: {
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
if (Src.getValueSizeInBits() > ExtSizeInBits)
Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
else if (Src.getValueSizeInBits() < ExtSizeInBits) {
MVT SrcSVT = Src.getSimpleValueType().getScalarType();
MVT SrcVT =
MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
}
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
TLO.DAG, DL, ExtSizeInBits));
}
// Byte shifts by immediate.
case X86ISD::VSHLDQ:
@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() &&
Vec.hasOneUse()) {
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
Vec.getOperand(1), Vec.getOperand(2));
@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'G':
case 'L':
case 'M':
return C_Immediate;
case 'C':
case 'e':
case 'Z':

View File

@ -3288,26 +3288,35 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
// Look for an 'and' of two (opposite) logical shifts.
// Pick the single-use shift as XShift.
Value *XShift, *YShift;
Instruction *XShift, *YShift;
if (!match(I.getOperand(0),
m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift)))))
return nullptr;
// If YShift is a single-use 'lshr', swap the shifts around.
if (match(YShift, m_OneUse(m_AnyLShr)))
// If YShift is a 'lshr', swap the shifts around.
if (match(YShift, m_AnyLShr))
std::swap(XShift, YShift);
// The shifts must be in opposite directions.
Instruction::BinaryOps XShiftOpcode =
cast<BinaryOperator>(XShift)->getOpcode();
if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
auto XShiftOpcode = XShift->getOpcode();
if (XShiftOpcode == YShift->getOpcode())
return nullptr; // Do not care about same-direction shifts here.
Value *X, *XShAmt, *Y, *YShAmt;
match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
// If one of the values being shifted is a constant, then we will end with
// and+icmp, and shift instr will be constant-folded. If they are not,
// however, we will need to ensure that we won't increase instruction count.
if (!isa<Constant>(X) && !isa<Constant>(Y)) {
// At least one of the hands of the 'and' should be one-use shift.
if (!match(I.getOperand(0),
m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
return nullptr;
}
// Can we fold (XShAmt+YShAmt) ?
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
SQ.getWithInstruction(&I));

View File

@ -23,6 +23,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
using namespace llvm;
#define DEBUG_TYPE "div-rem-pairs"
@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
"Controls transformations in div-rem-pairs pass");
/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
/// them together by hoisting or replace the common division operation that is
/// implicit in the remainder:
/// X % Y <--> X - ((X / Y) * Y).
///
/// We can largely ignore the normal safety and cost constraints on speculation
/// of these ops when we find a matching pair. This is because we are already
/// guaranteed that any exceptions and most cost are already incurred by the
/// first member of the pair.
///
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
/// SimplifyCFG, but it's split off on its own because it's different enough
/// that it doesn't quite match the stated objectives of those passes.
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
const DominatorTree &DT) {
bool Changed = false;
/// A thin wrapper to store two values that we matched as div-rem pair.
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
struct DivRemPairWorklistEntry {
/// The actual udiv/sdiv instruction. Source of truth.
AssertingVH<Instruction> DivInst;
/// The instruction that we have matched as a remainder instruction.
/// Should only be used as Value, don't introspect it.
AssertingVH<Instruction> RemInst;
DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
: DivInst(DivInst_), RemInst(RemInst_) {
assert((DivInst->getOpcode() == Instruction::UDiv ||
DivInst->getOpcode() == Instruction::SDiv) &&
"Not a division.");
assert(DivInst->getType() == RemInst->getType() && "Types should match.");
// We can't check anything else about remainder instruction,
// it's not strictly required to be a urem/srem.
}
/// The type for this pair, identical for both the div and rem.
Type *getType() const { return DivInst->getType(); }
/// Is this pair signed or unsigned?
bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
/// In this pair, what are the divident and divisor?
Value *getDividend() const { return DivInst->getOperand(0); }
Value *getDivisor() const { return DivInst->getOperand(1); }
};
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). Place those pairs into a worklist for further
/// processing. This indirection is needed because we have to use TrackingVH<>
/// because we will be doing RAUW, and if one of the rem instructions we change
/// happens to be an input to another div/rem in the maps, we'd have problems.
static DivRemWorklistTy getWorklist(Function &F) {
// Insert all divide and remainder instructions into maps keyed by their
// operands and opcode (signed or unsigned).
DenseMap<DivRemMapKey, Instruction *> DivMap;
@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
}
}
// We'll accumulate the matching pairs of div-rem instructions here.
DivRemWorklistTy Worklist;
// We can iterate over either map because we are only looking for matched
// pairs. Choose remainders for efficiency because they are usually even more
// rare than division.
@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivInst)
continue;
// We have a matching pair of div/rem instructions. If one dominates the
// other, hoist and/or replace one.
// We have a matching pair of div/rem instructions.
NumPairs++;
Instruction *RemInst = RemPair.second;
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
// Place it in the worklist.
Worklist.emplace_back(DivInst, RemInst);
}
return Worklist;
}
/// Find matching pairs of integer div/rem ops (they have the same numerator,
/// denominator, and signedness). If they exist in different basic blocks, bring
/// them together by hoisting or replace the common division operation that is
/// implicit in the remainder:
/// X % Y <--> X - ((X / Y) * Y).
///
/// We can largely ignore the normal safety and cost constraints on speculation
/// of these ops when we find a matching pair. This is because we are already
/// guaranteed that any exceptions and most cost are already incurred by the
/// first member of the pair.
///
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
/// SimplifyCFG, but it's split off on its own because it's different enough
/// that it doesn't quite match the stated objectives of those passes.
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
const DominatorTree &DT) {
bool Changed = false;
// Get the matching pairs of div-rem instructions. We want this extra
// indirection to avoid dealing with having to RAUW the keys of the maps.
DivRemWorklistTy Worklist = getWorklist(F);
// Process each entry in the worklist.
for (DivRemPairWorklistEntry &E : Worklist) {
bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
auto &DivInst = E.DivInst;
auto &RemInst = E.RemInst;
// If the target supports div+rem and the instructions are in the same block
// already, there's nothing to do. The backend should handle this. If the
@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// The target does not have a single div/rem operation. Decompose the
// remainder calculation as:
// X % Y --> X - ((X / Y) * Y).
Value *X = RemInst->getOperand(0);
Value *Y = RemInst->getOperand(1);
Value *X = E.getDividend();
Value *Y = E.getDivisor();
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// Now kill the explicit remainder. We have replaced it with:
// (sub X, (mul (div X, Y), Y)
RemInst->replaceAllUsesWith(Sub);
RemInst->eraseFromParent();
Sub->setName(RemInst->getName() + ".decomposed");
Instruction *OrigRemInst = RemInst;
// Update AssertingVH<> with new instruction so it doesn't assert.
RemInst = Sub;
// And replace the original instruction with the new one.
OrigRemInst->replaceAllUsesWith(Sub);
OrigRemInst->eraseFromParent();
NumDecomposed++;
}
Changed = true;
@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
return optimizeDivRem(F, TTI, DT);
}
};
}
} // namespace
char DivRemPairsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",

View File

@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
// speculation if the predecessor is an invoke. This doesn't seem
// fundamental and we should probably be splitting critical edges
// differently.
if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
isa<InvokeInst>(PredBB->getTerminator())) {
const auto *TermInst = PredBB->getTerminator();
if (isa<IndirectBrInst>(TermInst) ||
isa<InvokeInst>(TermInst) ||
isa<CallBrInst>(TermInst)) {
LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
<< PredBB->getName() << "\n");
return false;