Vendor import of llvm release_90 branch r369369:
https://llvm.org/svn/llvm-project/llvm/branches/release_90@369369
This commit is contained in:
parent
e6d1592492
commit
464f838b7b
@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
|
||||
|
||||
/// A pointer to the AAResults object that this AAResult is
|
||||
/// aggregated within. May be null if not aggregated.
|
||||
AAResults *AAR;
|
||||
AAResults *AAR = nullptr;
|
||||
|
||||
/// Helper to dispatch calls back through the derived type.
|
||||
DerivedT &derived() { return static_cast<DerivedT &>(*this); }
|
||||
|
@ -269,7 +269,13 @@ class SelectionDAG {
|
||||
|
||||
using CallSiteInfo = MachineFunction::CallSiteInfo;
|
||||
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
|
||||
DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
|
||||
|
||||
struct CallSiteDbgInfo {
|
||||
CallSiteInfo CSInfo;
|
||||
MDNode *HeapAllocSite = nullptr;
|
||||
};
|
||||
|
||||
DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
|
||||
|
||||
uint16_t NextPersistentId = 0;
|
||||
|
||||
@ -1664,16 +1670,28 @@ public:
|
||||
}
|
||||
|
||||
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
|
||||
SDCallSiteInfo[CallNode] = std::move(CallInfo);
|
||||
SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
|
||||
}
|
||||
|
||||
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
|
||||
auto I = SDCallSiteInfo.find(CallNode);
|
||||
if (I != SDCallSiteInfo.end())
|
||||
return std::move(I->second);
|
||||
auto I = SDCallSiteDbgInfo.find(CallNode);
|
||||
if (I != SDCallSiteDbgInfo.end())
|
||||
return std::move(I->second).CSInfo;
|
||||
return CallSiteInfo();
|
||||
}
|
||||
|
||||
void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
|
||||
SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
|
||||
}
|
||||
|
||||
/// Return the HeapAllocSite type associated with the SDNode, if it exists.
|
||||
MDNode *getHeapAllocSite(const SDNode *Node) {
|
||||
auto It = SDCallSiteDbgInfo.find(Node);
|
||||
if (It == SDCallSiteDbgInfo.end())
|
||||
return nullptr;
|
||||
return It->second.HeapAllocSite;
|
||||
}
|
||||
|
||||
private:
|
||||
void InsertNode(SDNode *N);
|
||||
bool RemoveNodeFromCSEMaps(SDNode *N);
|
||||
|
@ -3665,6 +3665,7 @@ public:
|
||||
C_Register, // Constraint represents specific register(s).
|
||||
C_RegisterClass, // Constraint represents any of register(s) in class.
|
||||
C_Memory, // Memory constraint.
|
||||
C_Immediate, // Requires an immediate.
|
||||
C_Other, // Something else.
|
||||
C_Unknown // Unsupported constraint.
|
||||
};
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ExecutionEngine/JITSymbol.h"
|
||||
#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
|
@ -112,6 +112,9 @@ namespace llvm {
|
||||
/// number of section symbols with the same name).
|
||||
StringMap<bool, BumpPtrAllocator &> UsedNames;
|
||||
|
||||
/// Keeps track of labels that are used in inline assembly.
|
||||
SymbolTable InlineAsmUsedLabelNames;
|
||||
|
||||
/// The next ID to dole out to an unnamed assembler temporary symbol with
|
||||
/// a given prefix.
|
||||
StringMap<unsigned> NextID;
|
||||
@ -377,6 +380,16 @@ namespace llvm {
|
||||
/// APIs.
|
||||
const SymbolTable &getSymbols() const { return Symbols; }
|
||||
|
||||
/// isInlineAsmLabel - Return true if the name is a label referenced in
|
||||
/// inline assembly.
|
||||
MCSymbol *getInlineAsmLabel(StringRef Name) const {
|
||||
return InlineAsmUsedLabelNames.lookup(Name);
|
||||
}
|
||||
|
||||
/// registerInlineAsmLabel - Records that the name is a label referenced in
|
||||
/// inline assembly.
|
||||
void registerInlineAsmLabel(MCSymbol *Sym);
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Section Management
|
||||
|
@ -50,35 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
|
||||
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
|
||||
#endif
|
||||
// FIXME: This would be nicer were it tablegen
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
#undef AARCH64_ARCH_EXT_NAME
|
||||
|
||||
#ifndef AARCH64_CPU_NAME
|
||||
|
@ -53,7 +53,7 @@ enum ArchExtKind : unsigned {
|
||||
AEK_SVE2AES = 1 << 24,
|
||||
AEK_SVE2SM4 = 1 << 25,
|
||||
AEK_SVE2SHA3 = 1 << 26,
|
||||
AEK_BITPERM = 1 << 27,
|
||||
AEK_SVE2BITPERM = 1 << 27,
|
||||
};
|
||||
|
||||
enum class ArchKind {
|
||||
|
@ -39,19 +39,13 @@ enum ArchExtKind : unsigned {
|
||||
AEK_DSP = 1 << 10,
|
||||
AEK_FP16 = 1 << 11,
|
||||
AEK_RAS = 1 << 12,
|
||||
AEK_SVE = 1 << 13,
|
||||
AEK_DOTPROD = 1 << 14,
|
||||
AEK_SHA2 = 1 << 15,
|
||||
AEK_AES = 1 << 16,
|
||||
AEK_FP16FML = 1 << 17,
|
||||
AEK_SB = 1 << 18,
|
||||
AEK_SVE2 = 1 << 19,
|
||||
AEK_SVE2AES = 1 << 20,
|
||||
AEK_SVE2SM4 = 1 << 21,
|
||||
AEK_SVE2SHA3 = 1 << 22,
|
||||
AEK_BITPERM = 1 << 23,
|
||||
AEK_FP_DP = 1 << 24,
|
||||
AEK_LOB = 1 << 25,
|
||||
AEK_DOTPROD = 1 << 13,
|
||||
AEK_SHA2 = 1 << 14,
|
||||
AEK_AES = 1 << 15,
|
||||
AEK_FP16FML = 1 << 16,
|
||||
AEK_SB = 1 << 17,
|
||||
AEK_FP_DP = 1 << 18,
|
||||
AEK_LOB = 1 << 19,
|
||||
// Unsupported extensions.
|
||||
AEK_OS = 0x8000000,
|
||||
AEK_IWMMXT = 0x10000000,
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseMapInfo.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace llvm {
|
||||
@ -28,8 +29,8 @@ class Value;
|
||||
|
||||
struct DivRemMapKey {
|
||||
bool SignedOp;
|
||||
Value *Dividend;
|
||||
Value *Divisor;
|
||||
AssertingVH<Value> Dividend;
|
||||
AssertingVH<Value> Divisor;
|
||||
|
||||
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
|
||||
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
|
||||
@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
|
||||
}
|
||||
|
||||
static unsigned getHashValue(const DivRemMapKey &Val) {
|
||||
return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
|
||||
reinterpret_cast<uintptr_t>(Val.Divisor)) ^
|
||||
return (unsigned)(reinterpret_cast<uintptr_t>(
|
||||
static_cast<Value *>(Val.Dividend)) ^
|
||||
reinterpret_cast<uintptr_t>(
|
||||
static_cast<Value *>(Val.Divisor))) ^
|
||||
(unsigned)Val.SignedOp;
|
||||
}
|
||||
};
|
||||
|
@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
|
||||
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
|
||||
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
|
||||
Sym->print(OS, AP->MAI);
|
||||
MMI->getContext().registerInlineAsmLabel(Sym);
|
||||
} else if (MI->getOperand(OpNo).isMBB()) {
|
||||
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
|
||||
Sym->print(OS, AP->MAI);
|
||||
|
@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
|
||||
TheUse = InsertedShift;
|
||||
}
|
||||
|
||||
// If we removed all uses, nuke the shift.
|
||||
// If we removed all uses, or there are none, nuke the shift.
|
||||
if (ShiftI->use_empty()) {
|
||||
salvageDebugInfo(*ShiftI);
|
||||
ShiftI->eraseFromParent();
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
|
@ -691,9 +691,17 @@ void LiveDebugValues::insertTransferDebugPair(
|
||||
"No register supplied when handling a restore of a debug value");
|
||||
MachineFunction *MF = MI.getMF();
|
||||
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
|
||||
|
||||
const DIExpression *NewExpr;
|
||||
if (auto Fragment = DebugInstr->getDebugExpression()->getFragmentInfo())
|
||||
NewExpr = *DIExpression::createFragmentExpression(DIB.createExpression(),
|
||||
Fragment->OffsetInBits, Fragment->SizeInBits);
|
||||
else
|
||||
NewExpr = DIB.createExpression();
|
||||
|
||||
NewDebugInstr =
|
||||
BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
|
||||
NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
|
||||
NewReg, DebugInstr->getDebugVariable(), NewExpr);
|
||||
VarLoc VL(*NewDebugInstr, LS);
|
||||
ProcessVarLoc(VL, NewDebugInstr);
|
||||
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
|
||||
@ -848,9 +856,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
|
||||
<< "\n");
|
||||
}
|
||||
// Check if the register or spill location is the location of a debug value.
|
||||
// FIXME: Don't create a spill transfer if there is a complex expression,
|
||||
// because we currently cannot recover the original expression on restore.
|
||||
for (unsigned ID : OpenRanges.getVarLocs()) {
|
||||
const MachineInstr *DebugInstr = &VarLocIDs[ID].MI;
|
||||
|
||||
if (TKind == TransferKind::TransferSpill &&
|
||||
VarLocIDs[ID].isDescribedByReg() == Reg) {
|
||||
VarLocIDs[ID].isDescribedByReg() == Reg &&
|
||||
!DebugInstr->getDebugExpression()->isComplex()) {
|
||||
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
|
||||
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
|
||||
} else if (TKind == TransferKind::TransferRestore &&
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
@ -66,6 +67,7 @@ namespace {
|
||||
AliasAnalysis *AA;
|
||||
MachineDominatorTree *DT;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
@ -83,6 +85,8 @@ namespace {
|
||||
AU.addPreservedID(MachineLoopInfoID);
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addPreserved<MachineBlockFrequencyInfo>();
|
||||
}
|
||||
|
||||
void releaseMemory() override {
|
||||
@ -133,6 +137,11 @@ namespace {
|
||||
bool isPRECandidate(MachineInstr *MI);
|
||||
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
|
||||
bool PerformSimplePRE(MachineDominatorTree *DT);
|
||||
/// Heuristics to see if it's beneficial to move common computations of MBB
|
||||
/// and MBB1 to CandidateBB.
|
||||
bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
|
||||
if (!CMBB->isLegalToHoistInto())
|
||||
continue;
|
||||
|
||||
if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
|
||||
continue;
|
||||
|
||||
// Two instrs are partial redundant if their basic blocks are reachable
|
||||
// from one to another but one doesn't dominate another.
|
||||
if (CMBB != MBB1) {
|
||||
@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1) {
|
||||
if (CandidateBB->getParent()->getFunction().hasMinSize())
|
||||
return true;
|
||||
assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
|
||||
assert(DT->dominates(CandidateBB, MBB1) &&
|
||||
"CandidateBB should dominate MBB1");
|
||||
return MBFI->getBlockFreq(CandidateBB) <=
|
||||
MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
|
||||
}
|
||||
|
||||
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
||||
MRI = &MF.getRegInfo();
|
||||
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||
DT = &getAnalysis<MachineDominatorTree>();
|
||||
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
||||
LookAheadLimit = TII->getMachineCSELookAheadLimit();
|
||||
bool ChangedPRE, ChangedCSE;
|
||||
ChangedPRE = PerformSimplePRE(DT);
|
||||
|
@ -121,7 +121,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
|
||||
BBCallbacks.back().setMap(this);
|
||||
Entry.Index = BBCallbacks.size() - 1;
|
||||
Entry.Fn = BB->getParent();
|
||||
Entry.Symbols.push_back(Context.createTempSymbol());
|
||||
Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
|
||||
return Entry.Symbols;
|
||||
}
|
||||
|
||||
|
@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
|
||||
// Remember the source order of the inserted instruction.
|
||||
if (HasDbg)
|
||||
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
|
||||
|
||||
if (MDNode *MD = DAG->getHeapAllocSite(N)) {
|
||||
if (NewInsn && NewInsn->isCall())
|
||||
MF.addCodeViewHeapAllocSite(NewInsn, MD);
|
||||
}
|
||||
|
||||
GluedNodes.pop_back();
|
||||
}
|
||||
auto NewInsn =
|
||||
@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
|
||||
if (HasDbg)
|
||||
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
|
||||
NewInsn);
|
||||
if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
|
||||
if (NewInsn && NewInsn->isCall())
|
||||
MF.addCodeViewHeapAllocSite(NewInsn, MD);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert all the dbg_values which have not already been inserted in source
|
||||
|
@ -1084,6 +1084,7 @@ void SelectionDAG::clear() {
|
||||
ExternalSymbols.clear();
|
||||
TargetExternalSymbols.clear();
|
||||
MCSymbols.clear();
|
||||
SDCallSiteDbgInfo.clear();
|
||||
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
|
||||
static_cast<CondCodeSDNode*>(nullptr));
|
||||
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
|
||||
|
@ -8021,6 +8021,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
// Compute the constraint code and ConstraintType to use.
|
||||
TLI.ComputeConstraintToUse(T, SDValue());
|
||||
|
||||
if (T.ConstraintType == TargetLowering::C_Immediate &&
|
||||
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
|
||||
// We've delayed emitting a diagnostic like the "n" constraint because
|
||||
// inlining could cause an integer showing up.
|
||||
return emitInlineAsmError(
|
||||
CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
|
||||
"integer constant expression");
|
||||
|
||||
ExtraInfo.update(T);
|
||||
}
|
||||
|
||||
@ -8105,7 +8113,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
switch (OpInfo.Type) {
|
||||
case InlineAsm::isOutput:
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
|
||||
(OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
OpInfo.isIndirect)) {
|
||||
unsigned ConstraintID =
|
||||
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
|
||||
@ -8119,13 +8128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
MVT::i32));
|
||||
AsmNodeOperands.push_back(OpInfo.CallOperand);
|
||||
break;
|
||||
} else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
} else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
!OpInfo.isIndirect) ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
|
||||
// Otherwise, this outputs to a register (directly for C_Register /
|
||||
// C_RegisterClass, and a target-defined fashion for C_Other). Find a
|
||||
// register that we can use.
|
||||
// C_RegisterClass, and a target-defined fashion for
|
||||
// C_Immediate/C_Other). Find a register that we can use.
|
||||
if (OpInfo.AssignedRegs.Regs.empty()) {
|
||||
emitInlineAsmError(
|
||||
CS, "couldn't allocate output register for constraint '" +
|
||||
@ -8205,15 +8215,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
}
|
||||
|
||||
// Treat indirect 'X' constraint as memory.
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
OpInfo.isIndirect)
|
||||
OpInfo.ConstraintType = TargetLowering::C_Memory;
|
||||
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) {
|
||||
std::vector<SDValue> Ops;
|
||||
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
|
||||
Ops, DAG);
|
||||
if (Ops.empty()) {
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
|
||||
if (isa<ConstantSDNode>(InOperandVal)) {
|
||||
emitInlineAsmError(CS, "value out of range for constraint '" +
|
||||
Twine(OpInfo.ConstraintCode) + "'");
|
||||
return;
|
||||
}
|
||||
|
||||
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
|
||||
Twine(OpInfo.ConstraintCode) + "'");
|
||||
return;
|
||||
@ -8250,7 +8269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
}
|
||||
|
||||
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
|
||||
"Unknown constraint type!");
|
||||
|
||||
// TODO: Support this.
|
||||
@ -8356,6 +8376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
Val = OpInfo.AssignedRegs.getCopyFromRegs(
|
||||
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
|
||||
break;
|
||||
case TargetLowering::C_Immediate:
|
||||
case TargetLowering::C_Other:
|
||||
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
|
||||
OpInfo, DAG);
|
||||
|
@ -3567,15 +3567,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (S == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'r': return C_RegisterClass;
|
||||
case 'r':
|
||||
return C_RegisterClass;
|
||||
case 'm': // memory
|
||||
case 'o': // offsetable
|
||||
case 'V': // not offsetable
|
||||
return C_Memory;
|
||||
case 'i': // Simple Integer or Relocatable Constant
|
||||
case 'n': // Simple Integer
|
||||
case 'E': // Floating Point Constant
|
||||
case 'F': // Floating Point Constant
|
||||
return C_Immediate;
|
||||
case 'i': // Simple Integer or Relocatable Constant
|
||||
case 's': // Relocatable Constant
|
||||
case 'p': // Address.
|
||||
case 'X': // Allow ANY value.
|
||||
@ -3950,6 +3952,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
|
||||
/// Return an integer indicating how general CT is.
|
||||
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
|
||||
switch (CT) {
|
||||
case TargetLowering::C_Immediate:
|
||||
case TargetLowering::C_Other:
|
||||
case TargetLowering::C_Unknown:
|
||||
return 0;
|
||||
@ -4069,11 +4072,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
|
||||
TargetLowering::ConstraintType CType =
|
||||
TLI.getConstraintType(OpInfo.Codes[i]);
|
||||
|
||||
// If this is an 'other' constraint, see if the operand is valid for it.
|
||||
// For example, on X86 we might have an 'rI' constraint. If the operand
|
||||
// is an integer in the range [0..31] we want to use I (saving a load
|
||||
// of a register), otherwise we must use 'r'.
|
||||
if (CType == TargetLowering::C_Other && Op.getNode()) {
|
||||
// If this is an 'other' or 'immediate' constraint, see if the operand is
|
||||
// valid for it. For example, on X86 we might have an 'rI' constraint. If
|
||||
// the operand is an integer in the range [0..31] we want to use I (saving a
|
||||
// load of a register), otherwise we must use 'r'.
|
||||
if ((CType == TargetLowering::C_Other ||
|
||||
CType == TargetLowering::C_Immediate) && Op.getNode()) {
|
||||
assert(OpInfo.Codes[i].size() == 1 &&
|
||||
"Unhandled multi-letter 'other' constraint");
|
||||
std::vector<SDValue> ResultOps;
|
||||
|
@ -61,6 +61,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
|
||||
bool DoAutoReset)
|
||||
: SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
|
||||
Symbols(Allocator), UsedNames(Allocator),
|
||||
InlineAsmUsedLabelNames(Allocator),
|
||||
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
|
||||
AutoReset(DoAutoReset) {
|
||||
SecureLogFile = AsSecureLogFileName;
|
||||
@ -90,6 +91,7 @@ void MCContext::reset() {
|
||||
XCOFFAllocator.DestroyAll();
|
||||
|
||||
MCSubtargetAllocator.DestroyAll();
|
||||
InlineAsmUsedLabelNames.clear();
|
||||
UsedNames.clear();
|
||||
Symbols.clear();
|
||||
Allocator.Reset();
|
||||
@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer,
|
||||
Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
|
||||
}
|
||||
|
||||
void MCContext::registerInlineAsmLabel(MCSymbol *Sym) {
|
||||
InlineAsmUsedLabelNames[Sym->getName()] = Sym;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Section Management
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1142,7 +1142,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
|
||||
}
|
||||
}
|
||||
|
||||
MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
|
||||
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
|
||||
if (!Sym)
|
||||
Sym = getContext().getOrCreateSymbol(SymbolName);
|
||||
|
||||
// If this is an absolute variable reference, substitute it now to preserve
|
||||
// semantics in the face of reassignment.
|
||||
|
@ -90,9 +90,9 @@ static bool supportsBPF(uint64_t Type) {
|
||||
static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
|
||||
switch (R.getType()) {
|
||||
case ELF::R_BPF_64_32:
|
||||
return S & 0xFFFFFFFF;
|
||||
return (S + A) & 0xFFFFFFFF;
|
||||
case ELF::R_BPF_64_64:
|
||||
return S;
|
||||
return S + A;
|
||||
default:
|
||||
llvm_unreachable("Invalid relocation type");
|
||||
}
|
||||
|
@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
|
||||
Features.push_back("+sve2-sm4");
|
||||
if (Extensions & AEK_SVE2SHA3)
|
||||
Features.push_back("+sve2-sha3");
|
||||
if (Extensions & AEK_BITPERM)
|
||||
Features.push_back("+bitperm");
|
||||
if (Extensions & AEK_SVE2BITPERM)
|
||||
Features.push_back("+sve2-bitperm");
|
||||
if (Extensions & AEK_RCPC)
|
||||
Features.push_back("+rcpc");
|
||||
|
||||
|
@ -1200,7 +1200,7 @@ namespace fs {
|
||||
/// implementation.
|
||||
std::error_code copy_file(const Twine &From, const Twine &To) {
|
||||
uint32_t Flag = COPYFILE_DATA;
|
||||
#if __has_builtin(__builtin_available)
|
||||
#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
|
||||
if (__builtin_available(macos 10.12, *)) {
|
||||
bool IsSymlink;
|
||||
if (std::error_code Error = is_symlink_file(From, IsSymlink))
|
||||
|
@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
|
||||
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
|
||||
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
|
||||
|
||||
def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
|
||||
def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
|
||||
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
|
||||
|
||||
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
|
||||
|
@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
|
||||
|
||||
MaxLoadsPerMemcmpOptSize = 4;
|
||||
MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
|
||||
? MaxLoadsPerMemcmpOptSize : 8;
|
||||
|
||||
setStackPointerRegisterToSaveRestore(AArch64::SP);
|
||||
|
||||
setSchedulingPreference(Sched::Hybrid);
|
||||
@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'z':
|
||||
return C_Other;
|
||||
case 'x':
|
||||
case 'w':
|
||||
return C_RegisterClass;
|
||||
@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
// currently handle addresses it is the same as 'r'.
|
||||
case 'Q':
|
||||
return C_Memory;
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'Y':
|
||||
case 'Z':
|
||||
return C_Immediate;
|
||||
case 'z':
|
||||
case 'S': // A symbolic address
|
||||
return C_Other;
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
|
||||
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
|
||||
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
|
||||
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
|
||||
AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
|
||||
AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
|
||||
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
|
||||
AssemblerPredicate<"FeatureRCPC", "rcpc">;
|
||||
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
|
||||
|
@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in {
|
||||
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
|
||||
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
|
||||
|
||||
// SVE2 predicated shifts
|
||||
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
||||
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
|
||||
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
|
||||
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
|
||||
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
|
||||
|
||||
// SVE2 integer add/subtract long
|
||||
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
|
||||
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
|
||||
@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in {
|
||||
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
|
||||
|
||||
// SVE2 bitwise shift and insert
|
||||
defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
|
||||
defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
|
||||
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
|
||||
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
|
||||
|
||||
// SVE2 bitwise shift right and accumulate
|
||||
defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
|
||||
defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
|
||||
defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
|
||||
defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
|
||||
defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
|
||||
defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
|
||||
defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
|
||||
defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
|
||||
|
||||
// SVE2 complex integer add
|
||||
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
|
||||
@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in {
|
||||
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
|
||||
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
|
||||
|
||||
// SVE2 bitwise shift right narrow
|
||||
defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
|
||||
defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
|
||||
defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
|
||||
defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
|
||||
defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
|
||||
defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
|
||||
defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
|
||||
defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
|
||||
defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
|
||||
defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
|
||||
defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
|
||||
defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
|
||||
defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
|
||||
defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
|
||||
defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
|
||||
defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
|
||||
// SVE2 bitwise shift right narrow (bottom)
|
||||
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
|
||||
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
|
||||
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
|
||||
defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
|
||||
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
|
||||
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
|
||||
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
|
||||
defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
|
||||
|
||||
// SVE2 integer add/subtract narrow high part
|
||||
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
|
||||
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
|
||||
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
|
||||
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
|
||||
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
|
||||
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
|
||||
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
|
||||
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
|
||||
// SVE2 bitwise shift right narrow (top)
|
||||
defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
|
||||
defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
|
||||
defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
|
||||
defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
|
||||
defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
|
||||
defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
|
||||
defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
|
||||
defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
|
||||
|
||||
// SVE2 saturating extract narrow
|
||||
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
|
||||
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
|
||||
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
|
||||
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
|
||||
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
|
||||
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
|
||||
// SVE2 integer add/subtract narrow high part (bottom)
|
||||
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
|
||||
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
|
||||
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
|
||||
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
|
||||
|
||||
// SVE2 integer add/subtract narrow high part (top)
|
||||
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
|
||||
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
|
||||
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
|
||||
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
|
||||
|
||||
// SVE2 saturating extract narrow (bottom)
|
||||
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
|
||||
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
|
||||
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
|
||||
|
||||
// SVE2 saturating extract narrow (top)
|
||||
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
|
||||
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
|
||||
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
|
||||
|
||||
// SVE2 character match
|
||||
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
|
||||
@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in {
|
||||
// SVE2 histogram generation (vector)
|
||||
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
|
||||
|
||||
// SVE2 floating-point base 2 logarithm as integer
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
||||
|
||||
// SVE2 floating-point convert precision
|
||||
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
|
||||
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
|
||||
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
|
||||
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
|
||||
|
||||
// SVE2 floating-point pairwise operations
|
||||
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
|
||||
@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in {
|
||||
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
|
||||
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
|
||||
|
||||
// sve_int_rotate_imm
|
||||
// SVE2 bitwise xor and rotate right by immediate
|
||||
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
|
||||
|
||||
// SVE2 extract vector (immediate offset, constructive)
|
||||
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
|
||||
|
||||
// SVE floating-point convert precision
|
||||
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
|
||||
// SVE2 non-temporal gather loads
|
||||
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
|
||||
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
|
||||
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
|
||||
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
|
||||
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
|
||||
|
||||
// SVE floating-point convert to integer
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
||||
|
||||
// Non-temporal contiguous loads (vector + register)
|
||||
defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
|
||||
defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
|
||||
defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
|
||||
defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
|
||||
defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
|
||||
|
||||
defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
|
||||
defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
|
||||
defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
|
||||
defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
|
||||
defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
|
||||
defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
|
||||
defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
|
||||
defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
|
||||
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
|
||||
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
|
||||
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
|
||||
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
|
||||
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
|
||||
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
|
||||
|
||||
// SVE2 vector splice (constructive)
|
||||
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
|
||||
|
||||
// Predicated shifts
|
||||
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
||||
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
|
||||
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
|
||||
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
|
||||
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
|
||||
// SVE2 non-temporal scatter stores
|
||||
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
|
||||
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
|
||||
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
|
||||
|
||||
// Non-temporal contiguous stores (vector + register)
|
||||
defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
|
||||
defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
|
||||
defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
|
||||
defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
|
||||
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
|
||||
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
|
||||
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
|
||||
|
||||
defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
|
||||
defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
|
||||
defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
|
||||
defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
|
||||
|
||||
// SVE table lookup (three sources)
|
||||
// SVE2 table lookup (three sources)
|
||||
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
|
||||
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
|
||||
|
||||
// SVE integer compare scalar count and limit
|
||||
// SVE2 integer compare scalar count and limit
|
||||
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
|
||||
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
|
||||
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
|
||||
@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in {
|
||||
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
|
||||
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
|
||||
|
||||
// SVE pointer conflict compare
|
||||
// SVE2 pointer conflict compare
|
||||
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
|
||||
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
|
||||
}
|
||||
|
@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
|
||||
}
|
||||
|
||||
AArch64TTIImpl::TTI::MemCmpExpansionOptions
|
||||
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
|
||||
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
|
||||
Options.NumLoadsPerBlock = Options.MaxNumLoads;
|
||||
// TODO: Though vector loads usually perform well on AArch64, in some targets
|
||||
// they may wake up the FP unit, which raises the power consumption. Perhaps
|
||||
// they could be used with no holds barred (-O3).
|
||||
Options.LoadSizes = {8, 4, 2, 1};
|
||||
return Options;
|
||||
}
|
||||
|
||||
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||
unsigned Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
|
@ -130,6 +130,9 @@ public:
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
||||
bool IsZeroCmp) const;
|
||||
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
|
@ -2840,7 +2840,7 @@ static const struct Extension {
|
||||
{"sve2-aes", {AArch64::FeatureSVE2AES}},
|
||||
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
|
||||
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
|
||||
{"bitperm", {AArch64::FeatureSVE2BitPerm}},
|
||||
{"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
|
||||
// FIXME: Unsupported extensions
|
||||
{"pan", {}},
|
||||
{"lor", {}},
|
||||
|
@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
|
||||
}
|
||||
|
||||
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
|
||||
asm, "\t$Zdn, $Pg",
|
||||
ZPRRegOp zprty, PPRRegOp pprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm),
|
||||
asm, "\t$Zdn, $Pm",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
bits<4> Pg;
|
||||
bits<4> Pm;
|
||||
bits<5> Zdn;
|
||||
let Inst{31-24} = 0b00100101;
|
||||
let Inst{23-22} = sz8_64;
|
||||
@ -416,7 +416,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
let Inst{18-16} = opc{4-2};
|
||||
let Inst{15-11} = 0b10000;
|
||||
let Inst{10-9} = opc{1-0};
|
||||
let Inst{8-5} = Pg;
|
||||
let Inst{8-5} = Pm;
|
||||
let Inst{4-0} = Zdn;
|
||||
|
||||
let Constraints = "$Zdn = $_Zdn";
|
||||
@ -425,9 +425,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
}
|
||||
|
||||
multiclass sve_int_count_v<bits<5> opc, string asm> {
|
||||
def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
|
||||
def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
|
||||
def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
|
||||
def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>;
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
|
||||
}
|
||||
|
||||
class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
|
||||
@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl<string asm> {
|
||||
}
|
||||
|
||||
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
|
||||
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -758,6 +765,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
|
||||
let Inst{15-10} = 0b001011;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_perm_tbx<string asm> {
|
||||
@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd<string asm> {
|
||||
|
||||
class sve2_fp_convert_precision<bits<4> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
|
||||
asm, "\t$Zd, $Pg/m, $Zn",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_fp_convert_down_narrow<string asm> {
|
||||
@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
|
||||
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
|
||||
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
|
||||
let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
|
||||
def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
|
||||
def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
|
||||
def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
|
||||
def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
|
||||
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
|
||||
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
|
||||
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
|
||||
}
|
||||
|
||||
class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> Zm;
|
||||
let Inst{31-24} = 0b01000101;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-11} = 0b10010;
|
||||
let Inst{10} = opc;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
|
||||
def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
|
||||
def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
|
||||
def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
|
||||
def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2,
|
||||
Operand immtype>
|
||||
@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
|
||||
// SVE2 Accumulate Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
|
||||
class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
let Inst{10} = opc;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
|
||||
multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
|
||||
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
|
||||
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
|
||||
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
|
||||
asm, "\t$Zda, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
|
||||
// SVE2 Narrowing Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-16} = imm{2-0}; // imm3
|
||||
let Inst{15-14} = 0b00;
|
||||
let Inst{13-10} = opc;
|
||||
let Inst{13-11} = opc;
|
||||
let Inst{10} = 0b0;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> imm;
|
||||
let Inst{31-23} = 0b010001010;
|
||||
let Inst{22} = tsz8_64{2};
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-16} = imm{2-0}; // imm3
|
||||
let Inst{15-14} = 0b00;
|
||||
let Inst{13-11} = opc;
|
||||
let Inst{10} = 0b1;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = 0b011;
|
||||
let Inst{12-10} = opc; // S, R, T
|
||||
let Inst{12-11} = opc; // S, R
|
||||
let Inst{10} = 0b0; // Top
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> Zm;
|
||||
let Inst{31-24} = 0b01000101;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = 0b011;
|
||||
let Inst{12-11} = opc; // S, R
|
||||
let Inst{10} = 0b1; // Top
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
|
||||
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-13} = 0b000010;
|
||||
let Inst{12-10} = opc;
|
||||
let Inst{12-11} = opc;
|
||||
let Inst{10} = 0b0;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn),
|
||||
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
let Inst{31-23} = 0b010001010;
|
||||
let Inst{22} = tsz8_64{2};
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-13} = 0b000010;
|
||||
let Inst{12-11} = opc;
|
||||
let Inst{10} = 0b1;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
|
||||
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
|
||||
}
|
||||
|
||||
class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
|
||||
RegisterOperand VecList>
|
||||
: I<(outs VecList:$Zt), iops,
|
||||
class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty>
|
||||
: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
|
||||
multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty> {
|
||||
def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, listty>;
|
||||
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
|
||||
}
|
||||
@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill<string asm> {
|
||||
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
|
||||
}
|
||||
|
||||
class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
RegisterOperand VecList>
|
||||
: I<(outs VecList:$Zt), iops,
|
||||
asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
|
||||
multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty> {
|
||||
def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, listty>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
|
||||
}
|
||||
|
@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
|
||||
/// constraint it is for this target.
|
||||
ARMTargetLowering::ConstraintType
|
||||
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
unsigned S = Constraint.size();
|
||||
if (S == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'l': return C_RegisterClass;
|
||||
@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'h': return C_RegisterClass;
|
||||
case 'x': return C_RegisterClass;
|
||||
case 't': return C_RegisterClass;
|
||||
case 'j': return C_Other; // Constant for movw.
|
||||
// An address with a single base register. Due to the way we
|
||||
// currently handle addresses it is the same as an 'r' memory constraint.
|
||||
case 'j': return C_Immediate; // Constant for movw.
|
||||
// An address with a single base register. Due to the way we
|
||||
// currently handle addresses it is the same as an 'r' memory constraint.
|
||||
case 'Q': return C_Memory;
|
||||
}
|
||||
} else if (Constraint.size() == 2) {
|
||||
} else if (S == 2) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'T': return C_RegisterClass;
|
||||
|
@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
|
||||
Sched<[WriteBrTbl]> {
|
||||
let Size = 2;
|
||||
let isNotDuplicable = 1;
|
||||
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
|
||||
}
|
||||
}
|
||||
@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
|
||||
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
|
||||
// and make use of the same compressed jump table format as Thumb-2.
|
||||
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
|
||||
isIndirectBranch = 1 in {
|
||||
isIndirectBranch = 1, isNotDuplicable = 1 in {
|
||||
def tTBB_JT : tPseudoInst<(outs),
|
||||
(ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
|
||||
IIC_Br, []>, Sched<[WriteBr]>;
|
||||
|
@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'a': // Simple upper registers
|
||||
case 'b': // Base pointer registers pairs
|
||||
case 'd': // Upper register
|
||||
@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'O': // Integer constant (Range: 8, 16, 24)
|
||||
case 'P': // Integer constant (Range: 1)
|
||||
case 'R': // Integer constant (Range: -6 to 5)x
|
||||
return C_Other;
|
||||
default:
|
||||
break;
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -116,9 +116,8 @@ private:
|
||||
void replaceWithGEP(std::vector<CallInst *> &CallList,
|
||||
uint32_t NumOfZerosIndex, uint32_t DIIndex);
|
||||
|
||||
Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
|
||||
std::string &AccessKey, uint32_t Kind,
|
||||
MDNode *&TypeMeta);
|
||||
Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey,
|
||||
uint32_t Kind, MDNode *&TypeMeta);
|
||||
bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
|
||||
bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
|
||||
};
|
||||
@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
|
||||
/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
|
||||
/// pointer of the first preserve_*_access_index call, and construct the access
|
||||
/// string, which will be the name of a global variable.
|
||||
Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
std::string &AccessStr,
|
||||
Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
|
||||
std::string &AccessKey,
|
||||
uint32_t Kind,
|
||||
MDNode *&TypeMeta) {
|
||||
@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
|
||||
return nullptr;
|
||||
|
||||
// Construct the type string AccessStr.
|
||||
// Construct the type string AccessKey.
|
||||
for (unsigned I = 0; I < AccessIndices.size(); ++I)
|
||||
AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
|
||||
AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey;
|
||||
|
||||
if (TypeNameIndex == AccessIndices.size() - 1)
|
||||
AccessStr = "0:" + AccessStr;
|
||||
AccessKey = "0:" + AccessKey;
|
||||
|
||||
// Access key is the type name + access string, uniquely identifying
|
||||
// one kernel memory access.
|
||||
AccessKey = LastTypeName + ":" + AccessStr;
|
||||
AccessKey = LastTypeName + ":" + AccessKey;
|
||||
|
||||
return Base;
|
||||
}
|
||||
@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
/// transformation to a chain of relocable GEPs.
|
||||
bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
|
||||
uint32_t Kind) {
|
||||
std::string AccessStr, AccessKey;
|
||||
std::string AccessKey;
|
||||
MDNode *TypeMeta = nullptr;
|
||||
Value *Base =
|
||||
computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
|
||||
computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta);
|
||||
if (!Base)
|
||||
return false;
|
||||
|
||||
@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
|
||||
|
||||
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
|
||||
GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
|
||||
GlobalVariable::ExternalLinkage, NULL, AccessStr);
|
||||
GlobalVariable::ExternalLinkage, NULL, AccessKey);
|
||||
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
|
||||
// Set the metadata (debuginfo types) for the global.
|
||||
if (TypeMeta)
|
||||
|
@ -30,6 +30,18 @@ static const char *BTFKindStr[] = {
|
||||
#include "BTF.def"
|
||||
};
|
||||
|
||||
static const DIType * stripQualifiers(const DIType *Ty) {
|
||||
while (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
|
||||
unsigned Tag = DTy->getTag();
|
||||
if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
|
||||
Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type)
|
||||
break;
|
||||
Ty = DTy->getBaseType();
|
||||
}
|
||||
|
||||
return Ty;
|
||||
}
|
||||
|
||||
/// Emit a BTF common type.
|
||||
void BTFTypeBase::emitType(MCStreamer &OS) {
|
||||
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
|
||||
@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
|
||||
}
|
||||
}
|
||||
|
||||
BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
|
||||
uint32_t NumElems)
|
||||
: ElemSize(ElemSize) {
|
||||
BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
|
||||
uint32_t ElemSize, uint32_t NumElems)
|
||||
: ElemTyNoQual(Ty), ElemSize(ElemSize) {
|
||||
Kind = BTF::BTF_KIND_ARRAY;
|
||||
BTFType.NameOff = 0;
|
||||
BTFType.Info = Kind << 24;
|
||||
@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) {
|
||||
// created during initial type traversal. Just
|
||||
// retrieve that type id.
|
||||
ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
|
||||
|
||||
ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual)
|
||||
: ArrayInfo.ElemType;
|
||||
}
|
||||
|
||||
void BTFTypeArray::emitType(MCStreamer &OS) {
|
||||
@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
|
||||
|
||||
void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
|
||||
uint32_t &ElementTypeId) {
|
||||
ElementTypeId = ArrayInfo.ElemType;
|
||||
ElementTypeId = ElemTypeNoQual;
|
||||
LocOffset = Loc * ElemSize;
|
||||
}
|
||||
|
||||
@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
|
||||
} else {
|
||||
BTFMember.Offset = DDTy->getOffsetInBits();
|
||||
}
|
||||
BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
|
||||
const auto *BaseTy = DDTy->getBaseType();
|
||||
BTFMember.Type = BDebug.getTypeId(BaseTy);
|
||||
MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy)));
|
||||
Members.push_back(BTFMember);
|
||||
}
|
||||
}
|
||||
@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); }
|
||||
|
||||
void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
|
||||
uint32_t &MemberType) {
|
||||
MemberType = Members[Loc].Type;
|
||||
MemberType = MemberTypeNoQual[Loc];
|
||||
MemberOffset =
|
||||
HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
|
||||
}
|
||||
@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
|
||||
uint32_t ElemTypeId, ElemSize;
|
||||
const DIType *ElemType = CTy->getBaseType();
|
||||
visitTypeEntry(ElemType, ElemTypeId, false, false);
|
||||
|
||||
// Strip qualifiers from element type to get accurate element size.
|
||||
ElemType = stripQualifiers(ElemType);
|
||||
ElemSize = ElemType->getSizeInBits() >> 3;
|
||||
|
||||
if (!CTy->getSizeInBits()) {
|
||||
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
|
||||
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemType, ElemTypeId, 0, 0);
|
||||
ArrayTypes.push_back(TypeEntry.get());
|
||||
ElemTypeId = addType(std::move(TypeEntry), CTy);
|
||||
} else {
|
||||
@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
|
||||
const DISubrange *SR = cast<DISubrange>(Element);
|
||||
auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
|
||||
int64_t Count = CI->getSExtValue();
|
||||
const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr;
|
||||
|
||||
auto TypeEntry =
|
||||
llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
|
||||
llvm::make_unique<BTFTypeArray>(ArrayElemTy, ElemTypeId,
|
||||
ElemSize, Count);
|
||||
ArrayTypes.push_back(TypeEntry.get());
|
||||
if (I == 0)
|
||||
ElemTypeId = addType(std::move(TypeEntry), CTy);
|
||||
@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
|
||||
unsigned RootId = populateStructType(RootTy);
|
||||
setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
|
||||
unsigned RootTySize = PrevStructType->getStructSize();
|
||||
StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1);
|
||||
|
||||
BTFOffsetReloc OffsetReloc;
|
||||
OffsetReloc.Label = ORSym;
|
||||
OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
|
||||
OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back());
|
||||
OffsetReloc.TypeID = RootId;
|
||||
|
||||
uint32_t Start = 0, End = 0, Offset = 0;
|
||||
bool FirstAccess = true;
|
||||
for (auto C : AccessPattern) {
|
||||
for (auto C : IndexPattern) {
|
||||
if (C != ':') {
|
||||
End++;
|
||||
} else {
|
||||
std::string SubStr = AccessPattern.substr(Start, End - Start);
|
||||
std::string SubStr = IndexPattern.substr(Start, End - Start);
|
||||
int Loc = std::stoi(SubStr);
|
||||
|
||||
if (FirstAccess) {
|
||||
@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
|
||||
Offset += LocOffset;
|
||||
PrevArrayType = nullptr;
|
||||
setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
|
||||
} else {
|
||||
llvm_unreachable("Internal Error: BTF offset relocation type traversal error");
|
||||
}
|
||||
|
||||
Start = End + 1;
|
||||
End = Start;
|
||||
}
|
||||
}
|
||||
AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
|
||||
AccessOffsets[AccessPattern.str()] = Offset;
|
||||
OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
|
||||
}
|
||||
|
||||
@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
|
||||
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
|
||||
DIType *Ty = dyn_cast<DIType>(MDN);
|
||||
std::string TypeName = Ty->getName();
|
||||
int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
|
||||
int64_t Imm = AccessOffsets[GVar->getName().str()];
|
||||
|
||||
// Emit "mov ri, <imm>" for abstract member accesses.
|
||||
OutMI.setOpcode(BPF::MOV_ri);
|
||||
|
@ -104,11 +104,14 @@ public:
|
||||
|
||||
/// Handle array type.
|
||||
class BTFTypeArray : public BTFTypeBase {
|
||||
const DIType *ElemTyNoQual;
|
||||
uint32_t ElemSize;
|
||||
struct BTF::BTFArray ArrayInfo;
|
||||
uint32_t ElemTypeNoQual;
|
||||
|
||||
public:
|
||||
BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
|
||||
BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
|
||||
uint32_t ElemSize, uint32_t NumElems);
|
||||
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
|
||||
void completeType(BTFDebug &BDebug);
|
||||
void emitType(MCStreamer &OS);
|
||||
@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase {
|
||||
const DICompositeType *STy;
|
||||
bool HasBitField;
|
||||
std::vector<struct BTF::BTFMember> Members;
|
||||
std::vector<uint32_t> MemberTypeNoQual;
|
||||
|
||||
public:
|
||||
BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
|
||||
|
@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
|
||||
Res = V;
|
||||
} else
|
||||
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
|
||||
|
||||
MCBinaryExpr::Opcode Opcode;
|
||||
switch (getLexer().getKind()) {
|
||||
default:
|
||||
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
|
||||
return MatchOperand_Success;
|
||||
case AsmToken::Plus:
|
||||
Opcode = MCBinaryExpr::Add;
|
||||
break;
|
||||
case AsmToken::Minus:
|
||||
Opcode = MCBinaryExpr::Sub;
|
||||
break;
|
||||
}
|
||||
|
||||
const MCExpr *Expr;
|
||||
if (getParser().parseExpression(Expr))
|
||||
return MatchOperand_ParseFail;
|
||||
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
|
||||
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
|
||||
uint64_t FrameSize = MFI.getStackSize();
|
||||
|
||||
// Get the alignment.
|
||||
uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
|
||||
: getStackAlignment();
|
||||
unsigned StackAlign = getStackAlignment();
|
||||
if (RI->needsStackRealignment(MF)) {
|
||||
unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
|
||||
FrameSize += (MaxStackAlign - StackAlign);
|
||||
StackAlign = MaxStackAlign;
|
||||
}
|
||||
|
||||
// Set Max Call Frame Size
|
||||
uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
|
||||
MFI.setMaxCallFrameSize(MaxCallSize);
|
||||
|
||||
// Make sure the frame is aligned.
|
||||
FrameSize = alignTo(FrameSize, StackAlign);
|
||||
@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
|
||||
if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
|
||||
report_fatal_error(
|
||||
"RISC-V backend can't currently handle functions that need stack "
|
||||
"realignment and have variable sized objects");
|
||||
}
|
||||
|
||||
unsigned FPReg = getFPReg(STI);
|
||||
unsigned SPReg = getSPReg(STI);
|
||||
|
||||
@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
nullptr, RI->getDwarfRegNum(FPReg, true), 0));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
|
||||
// Realign Stack
|
||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
||||
if (RI->needsStackRealignment(MF)) {
|
||||
unsigned MaxAlignment = MFI.getMaxAlignment();
|
||||
|
||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||
if (isInt<12>(-(int)MaxAlignment)) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(-(int)MaxAlignment);
|
||||
} else {
|
||||
unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
|
||||
unsigned VR =
|
||||
MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
|
||||
.addReg(SPReg)
|
||||
.addImm(ShiftAmount);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
|
||||
.addReg(VR)
|
||||
.addImm(ShiftAmount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
if (FI >= MinCSFI && FI <= MaxCSFI) {
|
||||
FrameReg = RISCV::X2;
|
||||
Offset += MF.getFrameInfo().getStackSize();
|
||||
} else if (RI->needsStackRealignment(MF)) {
|
||||
assert(!MFI.hasVarSizedObjects() &&
|
||||
"Unexpected combination of stack realignment and varsized objects");
|
||||
// If the stack was realigned, the frame pointer is set in order to allow
|
||||
// SP to be restored, but we still access stack objects using SP.
|
||||
FrameReg = RISCV::X2;
|
||||
Offset += MF.getFrameInfo().getStackSize();
|
||||
} else {
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
if (hasFP(MF))
|
||||
|
@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
|
||||
// We can materialise `c1 << c2` into an add immediate, so it's "free",
|
||||
// and the combine should happen, to potentially allow further combines
|
||||
// later.
|
||||
if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
|
||||
if (ShiftedC1Int.getMinSignedBits() <= 64 &&
|
||||
isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
|
||||
return true;
|
||||
|
||||
// We can materialise `c1` in an add immediate, so it's "free", and the
|
||||
// combine should be prevented.
|
||||
if (isLegalAddImmediate(C1Int.getSExtValue()))
|
||||
if (C1Int.getMinSignedBits() <= 64 &&
|
||||
isLegalAddImmediate(C1Int.getSExtValue()))
|
||||
return false;
|
||||
|
||||
// Neither constant will fit into an immediate, so find materialisation
|
||||
@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// getConstraintType - Given a constraint letter, return the type of
|
||||
/// constraint it is for this target.
|
||||
RISCVTargetLowering::ConstraintType
|
||||
RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'f':
|
||||
return C_RegisterClass;
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
return TargetLowering::getConstraintType(Constraint);
|
||||
}
|
||||
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
StringRef Constraint,
|
||||
@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
switch (Constraint[0]) {
|
||||
case 'r':
|
||||
return std::make_pair(0U, &RISCV::GPRRegClass);
|
||||
case 'f':
|
||||
if (Subtarget.hasStdExtF() && VT == MVT::f32)
|
||||
return std::make_pair(0U, &RISCV::FPR32RegClass);
|
||||
if (Subtarget.hasStdExtD() && VT == MVT::f64)
|
||||
return std::make_pair(0U, &RISCV::FPR64RegClass);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -92,6 +92,7 @@ public:
|
||||
// This method returns the name of a target specific DAG node.
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
|
||||
ConstraintType getConstraintType(StringRef Constraint) const override;
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
StringRef Constraint, MVT VT) const override;
|
||||
|
@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'e':
|
||||
return C_RegisterClass;
|
||||
case 'I': // SIMM13
|
||||
return C_Other;
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'K': // Signed 16-bit constant
|
||||
case 'L': // Signed 20-bit displacement (on all targets we support)
|
||||
case 'M': // 0x7fffffff
|
||||
return C_Other;
|
||||
return C_Immediate;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
||||
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
|
||||
"Support 64-bit instructions">;
|
||||
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
|
||||
"64-bit with cmpxchg16b">;
|
||||
"64-bit with cmpxchg16b",
|
||||
[FeatureCMPXCHG8B]>;
|
||||
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
"SHLD instruction is slow">;
|
||||
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
|
||||
|
@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
|
||||
Complexity += 2;
|
||||
}
|
||||
|
||||
// Heuristic: try harder to form an LEA from ADD if the operands set flags.
|
||||
// Unlike ADD, LEA does not affect flags, so we will be less likely to require
|
||||
// duplicating flag-producing instructions later in the pipeline.
|
||||
if (N.getOpcode() == ISD::ADD) {
|
||||
auto isMathWithFlags = [](SDValue V) {
|
||||
switch (V.getOpcode()) {
|
||||
case X86ISD::ADD:
|
||||
case X86ISD::SUB:
|
||||
case X86ISD::ADC:
|
||||
case X86ISD::SBB:
|
||||
/* TODO: These opcodes can be added safely, but we may want to justify
|
||||
their inclusion for different reasons (better for reg-alloc).
|
||||
case X86ISD::SMUL:
|
||||
case X86ISD::UMUL:
|
||||
case X86ISD::OR:
|
||||
case X86ISD::XOR:
|
||||
case X86ISD::AND:
|
||||
*/
|
||||
// Value 1 is the flag output of the node - verify it's not dead.
|
||||
return !SDValue(V.getNode(), 1).use_empty();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
// TODO: This could be an 'or' rather than 'and' to make the transform more
|
||||
// likely to happen. We might want to factor in whether there's a
|
||||
// load folding opportunity for the math op that disappears with LEA.
|
||||
if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
|
||||
Complexity++;
|
||||
}
|
||||
|
||||
if (AM.Disp)
|
||||
Complexity++;
|
||||
|
||||
@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
|
||||
SDValue ImplDef = SDValue(
|
||||
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
|
||||
NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
|
||||
NBits);
|
||||
|
||||
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
|
||||
NBits = SDValue(
|
||||
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
|
||||
NBits, SRIdxVal), 0);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
|
||||
|
||||
if (Subtarget->hasBMI2()) {
|
||||
|
@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
InFlag = Chain.getValue(1);
|
||||
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
|
||||
|
||||
// Save heapallocsite metadata.
|
||||
if (CLI.CS)
|
||||
if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
|
||||
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
|
||||
|
||||
// Create the CALLSEQ_END node.
|
||||
unsigned NumBytesForCalleeToPop;
|
||||
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
|
||||
@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
|
||||
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
|
||||
Idx == (VT.getVectorNumElements() / 2) &&
|
||||
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
Src.getOperand(1).getValueType() == SubVT &&
|
||||
isNullConstant(Src.getOperand(2))) {
|
||||
Ops.push_back(Src.getOperand(1));
|
||||
Ops.push_back(Sub);
|
||||
@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case X86ISD::SUBV_BROADCAST: {
|
||||
// Reduce size of broadcast if we don't need the upper half.
|
||||
unsigned HalfElts = NumElts / 2;
|
||||
if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
|
||||
SDValue Half = Src;
|
||||
if (SrcVT.getVectorNumElements() != HalfElts) {
|
||||
MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
|
||||
Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
|
||||
}
|
||||
|
||||
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
|
||||
TLO.DAG, SDLoc(Op),
|
||||
Half.getValueSizeInBits()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::VPERMV: {
|
||||
SDValue Mask = Op.getOperand(0);
|
||||
APInt MaskUndef, MaskZero;
|
||||
@ -34134,6 +34121,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
// Subvector broadcast.
|
||||
case X86ISD::SUBV_BROADCAST: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Src = Op.getOperand(0);
|
||||
if (Src.getValueSizeInBits() > ExtSizeInBits)
|
||||
Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
else if (Src.getValueSizeInBits() < ExtSizeInBits) {
|
||||
MVT SrcSVT = Src.getSimpleValueType().getScalarType();
|
||||
MVT SrcVT =
|
||||
MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
|
||||
Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
|
||||
}
|
||||
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
|
||||
TLO.DAG, DL, ExtSizeInBits));
|
||||
}
|
||||
// Byte shifts by immediate.
|
||||
case X86ISD::VSHLDQ:
|
||||
@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
|
||||
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
|
||||
isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
|
||||
Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() &&
|
||||
Vec.hasOneUse()) {
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
|
||||
Vec.getOperand(1), Vec.getOperand(2));
|
||||
@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'G':
|
||||
case 'L':
|
||||
case 'M':
|
||||
return C_Immediate;
|
||||
case 'C':
|
||||
case 'e':
|
||||
case 'Z':
|
||||
|
@ -3288,26 +3288,35 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
|
||||
|
||||
// Look for an 'and' of two (opposite) logical shifts.
|
||||
// Pick the single-use shift as XShift.
|
||||
Value *XShift, *YShift;
|
||||
Instruction *XShift, *YShift;
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
|
||||
m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
|
||||
m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
|
||||
m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift)))))
|
||||
return nullptr;
|
||||
|
||||
// If YShift is a single-use 'lshr', swap the shifts around.
|
||||
if (match(YShift, m_OneUse(m_AnyLShr)))
|
||||
// If YShift is a 'lshr', swap the shifts around.
|
||||
if (match(YShift, m_AnyLShr))
|
||||
std::swap(XShift, YShift);
|
||||
|
||||
// The shifts must be in opposite directions.
|
||||
Instruction::BinaryOps XShiftOpcode =
|
||||
cast<BinaryOperator>(XShift)->getOpcode();
|
||||
if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
|
||||
auto XShiftOpcode = XShift->getOpcode();
|
||||
if (XShiftOpcode == YShift->getOpcode())
|
||||
return nullptr; // Do not care about same-direction shifts here.
|
||||
|
||||
Value *X, *XShAmt, *Y, *YShAmt;
|
||||
match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
|
||||
match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
|
||||
|
||||
// If one of the values being shifted is a constant, then we will end with
|
||||
// and+icmp, and shift instr will be constant-folded. If they are not,
|
||||
// however, we will need to ensure that we won't increase instruction count.
|
||||
if (!isa<Constant>(X) && !isa<Constant>(Y)) {
|
||||
// At least one of the hands of the 'and' should be one-use shift.
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Can we fold (XShAmt+YShAmt) ?
|
||||
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
|
||||
SQ.getWithInstruction(&I));
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "llvm/Support/DebugCounter.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "div-rem-pairs"
|
||||
@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
|
||||
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
|
||||
"Controls transformations in div-rem-pairs pass");
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). If they exist in different basic blocks, bring
|
||||
/// them together by hoisting or replace the common division operation that is
|
||||
/// implicit in the remainder:
|
||||
/// X % Y <--> X - ((X / Y) * Y).
|
||||
///
|
||||
/// We can largely ignore the normal safety and cost constraints on speculation
|
||||
/// of these ops when we find a matching pair. This is because we are already
|
||||
/// guaranteed that any exceptions and most cost are already incurred by the
|
||||
/// first member of the pair.
|
||||
///
|
||||
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
|
||||
/// SimplifyCFG, but it's split off on its own because it's different enough
|
||||
/// that it doesn't quite match the stated objectives of those passes.
|
||||
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
const DominatorTree &DT) {
|
||||
bool Changed = false;
|
||||
/// A thin wrapper to store two values that we matched as div-rem pair.
|
||||
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
|
||||
struct DivRemPairWorklistEntry {
|
||||
/// The actual udiv/sdiv instruction. Source of truth.
|
||||
AssertingVH<Instruction> DivInst;
|
||||
|
||||
/// The instruction that we have matched as a remainder instruction.
|
||||
/// Should only be used as Value, don't introspect it.
|
||||
AssertingVH<Instruction> RemInst;
|
||||
|
||||
DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
|
||||
: DivInst(DivInst_), RemInst(RemInst_) {
|
||||
assert((DivInst->getOpcode() == Instruction::UDiv ||
|
||||
DivInst->getOpcode() == Instruction::SDiv) &&
|
||||
"Not a division.");
|
||||
assert(DivInst->getType() == RemInst->getType() && "Types should match.");
|
||||
// We can't check anything else about remainder instruction,
|
||||
// it's not strictly required to be a urem/srem.
|
||||
}
|
||||
|
||||
/// The type for this pair, identical for both the div and rem.
|
||||
Type *getType() const { return DivInst->getType(); }
|
||||
|
||||
/// Is this pair signed or unsigned?
|
||||
bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
|
||||
|
||||
/// In this pair, what are the divident and divisor?
|
||||
Value *getDividend() const { return DivInst->getOperand(0); }
|
||||
Value *getDivisor() const { return DivInst->getOperand(1); }
|
||||
};
|
||||
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). Place those pairs into a worklist for further
|
||||
/// processing. This indirection is needed because we have to use TrackingVH<>
|
||||
/// because we will be doing RAUW, and if one of the rem instructions we change
|
||||
/// happens to be an input to another div/rem in the maps, we'd have problems.
|
||||
static DivRemWorklistTy getWorklist(Function &F) {
|
||||
// Insert all divide and remainder instructions into maps keyed by their
|
||||
// operands and opcode (signed or unsigned).
|
||||
DenseMap<DivRemMapKey, Instruction *> DivMap;
|
||||
@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
}
|
||||
}
|
||||
|
||||
// We'll accumulate the matching pairs of div-rem instructions here.
|
||||
DivRemWorklistTy Worklist;
|
||||
|
||||
// We can iterate over either map because we are only looking for matched
|
||||
// pairs. Choose remainders for efficiency because they are usually even more
|
||||
// rare than division.
|
||||
@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
if (!DivInst)
|
||||
continue;
|
||||
|
||||
// We have a matching pair of div/rem instructions. If one dominates the
|
||||
// other, hoist and/or replace one.
|
||||
// We have a matching pair of div/rem instructions.
|
||||
NumPairs++;
|
||||
Instruction *RemInst = RemPair.second;
|
||||
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
|
||||
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
|
||||
|
||||
// Place it in the worklist.
|
||||
Worklist.emplace_back(DivInst, RemInst);
|
||||
}
|
||||
|
||||
return Worklist;
|
||||
}
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). If they exist in different basic blocks, bring
|
||||
/// them together by hoisting or replace the common division operation that is
|
||||
/// implicit in the remainder:
|
||||
/// X % Y <--> X - ((X / Y) * Y).
|
||||
///
|
||||
/// We can largely ignore the normal safety and cost constraints on speculation
|
||||
/// of these ops when we find a matching pair. This is because we are already
|
||||
/// guaranteed that any exceptions and most cost are already incurred by the
|
||||
/// first member of the pair.
|
||||
///
|
||||
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
|
||||
/// SimplifyCFG, but it's split off on its own because it's different enough
|
||||
/// that it doesn't quite match the stated objectives of those passes.
|
||||
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
const DominatorTree &DT) {
|
||||
bool Changed = false;
|
||||
|
||||
// Get the matching pairs of div-rem instructions. We want this extra
|
||||
// indirection to avoid dealing with having to RAUW the keys of the maps.
|
||||
DivRemWorklistTy Worklist = getWorklist(F);
|
||||
|
||||
// Process each entry in the worklist.
|
||||
for (DivRemPairWorklistEntry &E : Worklist) {
|
||||
bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
|
||||
|
||||
auto &DivInst = E.DivInst;
|
||||
auto &RemInst = E.RemInst;
|
||||
|
||||
// If the target supports div+rem and the instructions are in the same block
|
||||
// already, there's nothing to do. The backend should handle this. If the
|
||||
@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
// The target does not have a single div/rem operation. Decompose the
|
||||
// remainder calculation as:
|
||||
// X % Y --> X - ((X / Y) * Y).
|
||||
Value *X = RemInst->getOperand(0);
|
||||
Value *Y = RemInst->getOperand(1);
|
||||
Value *X = E.getDividend();
|
||||
Value *Y = E.getDivisor();
|
||||
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
|
||||
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
|
||||
|
||||
@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
|
||||
// Now kill the explicit remainder. We have replaced it with:
|
||||
// (sub X, (mul (div X, Y), Y)
|
||||
RemInst->replaceAllUsesWith(Sub);
|
||||
RemInst->eraseFromParent();
|
||||
Sub->setName(RemInst->getName() + ".decomposed");
|
||||
Instruction *OrigRemInst = RemInst;
|
||||
// Update AssertingVH<> with new instruction so it doesn't assert.
|
||||
RemInst = Sub;
|
||||
// And replace the original instruction with the new one.
|
||||
OrigRemInst->replaceAllUsesWith(Sub);
|
||||
OrigRemInst->eraseFromParent();
|
||||
NumDecomposed++;
|
||||
}
|
||||
Changed = true;
|
||||
@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
|
||||
return optimizeDivRem(F, TTI, DT);
|
||||
}
|
||||
};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
char DivRemPairsLegacyPass::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
|
||||
|
@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
|
||||
// speculation if the predecessor is an invoke. This doesn't seem
|
||||
// fundamental and we should probably be splitting critical edges
|
||||
// differently.
|
||||
if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
|
||||
isa<InvokeInst>(PredBB->getTerminator())) {
|
||||
const auto *TermInst = PredBB->getTerminator();
|
||||
if (isa<IndirectBrInst>(TermInst) ||
|
||||
isa<InvokeInst>(TermInst) ||
|
||||
isa<CallBrInst>(TermInst)) {
|
||||
LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
|
||||
<< PredBB->getName() << "\n");
|
||||
return false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user