Vendor import of llvm release_60 branch r321788:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@321788
This commit is contained in:
parent
b8a2042aa9
commit
d215fd3b74
@ -27,7 +27,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
|
||||
set(LLVM_VERSION_PATCH 0)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_SUFFIX)
|
||||
set(LLVM_VERSION_SUFFIX svn)
|
||||
set(LLVM_VERSION_SUFFIX "")
|
||||
endif()
|
||||
|
||||
if (NOT PACKAGE_VERSION)
|
||||
|
@ -640,7 +640,8 @@ endif()
|
||||
string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}")
|
||||
|
||||
function(find_python_module module)
|
||||
string(TOUPPER ${module} module_upper)
|
||||
string(REPLACE "." "_" module_name ${module})
|
||||
string(TOUPPER ${module_name} module_upper)
|
||||
set(FOUND_VAR PY_${module_upper}_FOUND)
|
||||
|
||||
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${module}"
|
||||
@ -658,13 +659,16 @@ endfunction()
|
||||
|
||||
set (PYTHON_MODULES
|
||||
pygments
|
||||
# Some systems still don't have pygments.lexers.c_cpp which was introduced in
|
||||
# version 2.0 in 2014...
|
||||
pygments.lexers.c_cpp
|
||||
yaml
|
||||
)
|
||||
foreach(module ${PYTHON_MODULES})
|
||||
find_python_module(${module})
|
||||
endforeach()
|
||||
|
||||
if(PY_PYGMENTS_FOUND AND PY_YAML_FOUND)
|
||||
if(PY_PYGMENTS_FOUND AND PY_PYGMENTS_LEXERS_C_CPP_FOUND AND PY_YAML_FOUND)
|
||||
set (LLVM_HAVE_OPT_VIEWER_MODULES 1)
|
||||
else()
|
||||
set (LLVM_HAVE_OPT_VIEWER_MODULES 0)
|
||||
|
@ -325,9 +325,9 @@ class TargetPassConfig : public ImmutablePass {
|
||||
virtual bool isGlobalISelEnabled() const;
|
||||
|
||||
/// Check whether or not GlobalISel should abort on error.
|
||||
/// When this is disable, GlobalISel will fall back on SDISel instead of
|
||||
/// When this is disabled, GlobalISel will fall back on SDISel instead of
|
||||
/// erroring out.
|
||||
virtual bool isGlobalISelAbortEnabled() const;
|
||||
bool isGlobalISelAbortEnabled() const;
|
||||
|
||||
/// Check whether or not a diagnostic should be emitted when GlobalISel
|
||||
/// uses the fallback path. In other words, it will emit a diagnostic
|
||||
|
@ -218,6 +218,7 @@ class Function : public GlobalObject, public ilist_node<Function> {
|
||||
Attribute::get(getContext(), Kind, Val));
|
||||
}
|
||||
|
||||
/// @brief Add function attributes to this function.
|
||||
void addFnAttr(Attribute Attr) {
|
||||
addAttribute(AttributeList::FunctionIndex, Attr);
|
||||
}
|
||||
@ -268,6 +269,8 @@ class Function : public GlobalObject, public ilist_node<Function> {
|
||||
bool hasFnAttribute(Attribute::AttrKind Kind) const {
|
||||
return AttributeSets.hasFnAttribute(Kind);
|
||||
}
|
||||
|
||||
/// @brief Return true if the function has the attribute.
|
||||
bool hasFnAttribute(StringRef Kind) const {
|
||||
return AttributeSets.hasFnAttribute(Kind);
|
||||
}
|
||||
@ -276,6 +279,8 @@ class Function : public GlobalObject, public ilist_node<Function> {
|
||||
Attribute getFnAttribute(Attribute::AttrKind Kind) const {
|
||||
return getAttribute(AttributeList::FunctionIndex, Kind);
|
||||
}
|
||||
|
||||
/// @brief Return the attribute for the given attribute kind.
|
||||
Attribute getFnAttribute(StringRef Kind) const {
|
||||
return getAttribute(AttributeList::FunctionIndex, Kind);
|
||||
}
|
||||
@ -342,10 +347,12 @@ class Function : public GlobalObject, public ilist_node<Function> {
|
||||
return getAttributes().hasParamAttribute(ArgNo, Kind);
|
||||
}
|
||||
|
||||
/// @brief gets the attribute from the list of attributes.
|
||||
Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
|
||||
return AttributeSets.getAttribute(i, Kind);
|
||||
}
|
||||
|
||||
/// @brief gets the attribute from the list of attributes.
|
||||
Attribute getAttribute(unsigned i, StringRef Kind) const {
|
||||
return AttributeSets.getAttribute(i, Kind);
|
||||
}
|
||||
|
@ -304,7 +304,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic <
|
||||
llvm_i1_ty, // slc(imm)
|
||||
llvm_i1_ty, // lwe(imm)
|
||||
llvm_i1_ty], // da(imm)
|
||||
!if(NoMem, [IntrNoMem], [IntrReadMem])>;
|
||||
!if(NoMem, [IntrNoMem], [IntrReadMem]), "",
|
||||
!if(NoMem, [], [SDNPMemOperand])>;
|
||||
|
||||
def int_amdgcn_image_load : AMDGPUImageLoad;
|
||||
def int_amdgcn_image_load_mip : AMDGPUImageLoad;
|
||||
@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic <
|
||||
llvm_i1_ty, // slc(imm)
|
||||
llvm_i1_ty, // lwe(imm)
|
||||
llvm_i1_ty], // da(imm)
|
||||
[]>;
|
||||
[IntrWriteMem], "", [SDNPMemOperand]>;
|
||||
|
||||
def int_amdgcn_image_store : AMDGPUImageStore;
|
||||
def int_amdgcn_image_store_mip : AMDGPUImageStore;
|
||||
@ -336,7 +337,8 @@ class AMDGPUImageSample<bit NoMem = 0> : Intrinsic <
|
||||
llvm_i1_ty, // slc(imm)
|
||||
llvm_i1_ty, // lwe(imm)
|
||||
llvm_i1_ty], // da(imm)
|
||||
!if(NoMem, [IntrNoMem], [IntrReadMem])>;
|
||||
!if(NoMem, [IntrNoMem], [IntrReadMem]), "",
|
||||
!if(NoMem, [], [SDNPMemOperand])>;
|
||||
|
||||
// Basic sample
|
||||
def int_amdgcn_image_sample : AMDGPUImageSample;
|
||||
@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic <
|
||||
llvm_i1_ty, // r128(imm)
|
||||
llvm_i1_ty, // da(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[], "", [SDNPMemOperand]>;
|
||||
|
||||
def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
|
||||
def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
|
||||
@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic <
|
||||
llvm_i1_ty, // r128(imm)
|
||||
llvm_i1_ty, // da(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[], "", [SDNPMemOperand]>;
|
||||
|
||||
class AMDGPUBufferLoad : Intrinsic <
|
||||
[llvm_anyfloat_ty],
|
||||
@ -460,7 +462,7 @@ class AMDGPUBufferLoad : Intrinsic <
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[IntrReadMem]>;
|
||||
[IntrReadMem], "", [SDNPMemOperand]>;
|
||||
def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
|
||||
def int_amdgcn_buffer_load : AMDGPUBufferLoad;
|
||||
|
||||
@ -472,7 +474,7 @@ class AMDGPUBufferStore : Intrinsic <
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[IntrWriteMem]>;
|
||||
[IntrWriteMem], "", [SDNPMemOperand]>;
|
||||
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
|
||||
def int_amdgcn_buffer_store : AMDGPUBufferStore;
|
||||
|
||||
@ -487,7 +489,7 @@ def int_amdgcn_tbuffer_load : Intrinsic <
|
||||
llvm_i32_ty, // nfmt(imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[IntrReadMem], "", [SDNPMemOperand]>;
|
||||
|
||||
def int_amdgcn_tbuffer_store : Intrinsic <
|
||||
[],
|
||||
@ -501,7 +503,7 @@ def int_amdgcn_tbuffer_store : Intrinsic <
|
||||
llvm_i32_ty, // nfmt(imm)
|
||||
llvm_i1_ty, // glc(imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[IntrWriteMem], "", [SDNPMemOperand]>;
|
||||
|
||||
class AMDGPUBufferAtomic : Intrinsic <
|
||||
[llvm_i32_ty],
|
||||
@ -510,7 +512,7 @@ class AMDGPUBufferAtomic : Intrinsic <
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[], "", [SDNPMemOperand]>;
|
||||
def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
|
||||
def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
|
||||
def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
|
||||
@ -529,7 +531,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
|
||||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(SGPR/VGPR/imm)
|
||||
llvm_i1_ty], // slc(imm)
|
||||
[]>;
|
||||
[], "", [SDNPMemOperand]>;
|
||||
|
||||
// Uses that do not set the done bit should set IntrWriteMem on the
|
||||
// call site.
|
||||
|
@ -1862,6 +1862,33 @@ using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver,
|
||||
SmallVectorImpl<const char *> &NewArgv,
|
||||
bool MarkEOLs);
|
||||
|
||||
/// Tokenizes content of configuration file.
|
||||
///
|
||||
/// \param [in] Source The string representing content of config file.
|
||||
/// \param [in] Saver Delegates back to the caller for saving parsed strings.
|
||||
/// \param [out] NewArgv All parsed strings are appended to NewArgv.
|
||||
/// \param [in] MarkEOLs Added for compatibility with TokenizerCallback.
|
||||
///
|
||||
/// It works like TokenizeGNUCommandLine with ability to skip comment lines.
|
||||
///
|
||||
void tokenizeConfigFile(StringRef Source, StringSaver &Saver,
|
||||
SmallVectorImpl<const char *> &NewArgv,
|
||||
bool MarkEOLs = false);
|
||||
|
||||
/// Reads command line options from the given configuration file.
|
||||
///
|
||||
/// \param [in] CfgFileName Path to configuration file.
|
||||
/// \param [in] Saver Objects that saves allocated strings.
|
||||
/// \param [out] Argv Array to which the read options are added.
|
||||
/// \return true if the file was successfully read.
|
||||
///
|
||||
/// It reads content of the specified file, tokenizes it and expands "@file"
|
||||
/// commands resolving file names in them relative to the directory where
|
||||
/// CfgFilename resides.
|
||||
///
|
||||
bool readConfigFile(StringRef CfgFileName, StringSaver &Saver,
|
||||
SmallVectorImpl<const char *> &Argv);
|
||||
|
||||
/// \brief Expand response files on a command line recursively using the given
|
||||
/// StringSaver and tokenization strategy. Argv should contain the command line
|
||||
/// before expansion and will be modified in place. If requested, Argv will
|
||||
|
@ -123,8 +123,8 @@ class Target {
|
||||
using AsmPrinterCtorTy = AsmPrinter *(*)(
|
||||
TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
|
||||
using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
using MCAsmParserCtorTy = MCTargetAsmParser *(*)(
|
||||
const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
|
||||
@ -381,15 +381,12 @@ class Target {
|
||||
}
|
||||
|
||||
/// createMCAsmBackend - Create a target specific assembly parser.
|
||||
///
|
||||
/// \param TheTriple The target triple string.
|
||||
MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI,
|
||||
StringRef TheTriple, StringRef CPU,
|
||||
const MCTargetOptions &Options)
|
||||
const {
|
||||
MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options) const {
|
||||
if (!MCAsmBackendCtorFn)
|
||||
return nullptr;
|
||||
return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU, Options);
|
||||
return MCAsmBackendCtorFn(*this, STI, MRI, Options);
|
||||
}
|
||||
|
||||
/// createMCAsmParser - Create a target specific assembly parser.
|
||||
@ -1106,10 +1103,10 @@ template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
|
||||
}
|
||||
|
||||
private:
|
||||
static MCAsmBackend *Allocator(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple, StringRef CPU,
|
||||
static MCAsmBackend *Allocator(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options) {
|
||||
return new MCAsmBackendImpl(T, MRI, TheTriple, CPU);
|
||||
return new MCAsmBackendImpl(T, STI, MRI);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -264,7 +264,8 @@ template <typename LoopPassT>
|
||||
class FunctionToLoopPassAdaptor
|
||||
: public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> {
|
||||
public:
|
||||
explicit FunctionToLoopPassAdaptor(LoopPassT Pass) : Pass(std::move(Pass)) {
|
||||
explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false)
|
||||
: Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) {
|
||||
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
|
||||
LoopCanonicalizationFPM.addPass(LCSSAPass());
|
||||
}
|
||||
@ -384,8 +385,8 @@ class FunctionToLoopPassAdaptor
|
||||
/// adaptor.
|
||||
template <typename LoopPassT>
|
||||
FunctionToLoopPassAdaptor<LoopPassT>
|
||||
createFunctionToLoopPassAdaptor(LoopPassT Pass) {
|
||||
return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass));
|
||||
createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) {
|
||||
return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), DebugLogging);
|
||||
}
|
||||
|
||||
/// \brief Pass for printing a loop's contents as textual IR.
|
||||
|
@ -826,7 +826,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
|
||||
MaxRecurse))
|
||||
return V;
|
||||
|
||||
// Mul distributes over Add. Try some generic simplifications based on this.
|
||||
// Mul distributes over Add. Try some generic simplifications based on this.
|
||||
if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
|
||||
Q, MaxRecurse))
|
||||
return V;
|
||||
@ -3838,12 +3838,13 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
|
||||
// Fold into undef if index is out of bounds.
|
||||
if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
|
||||
uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements();
|
||||
|
||||
if (CI->uge(NumElements))
|
||||
return UndefValue::get(Vec->getType());
|
||||
}
|
||||
|
||||
// TODO: We should also fold if index is iteslf an undef.
|
||||
// If index is undef, it might be out of bounds (see above case)
|
||||
if (isa<UndefValue>(Idx))
|
||||
return UndefValue::get(Vec->getType());
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -3896,10 +3897,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
|
||||
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
|
||||
if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
|
||||
if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements()))
|
||||
// definitely out of bounds, thus undefined result
|
||||
return UndefValue::get(Vec->getType()->getVectorElementType());
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
}
|
||||
|
||||
// An undef extract index can be arbitrarily chosen to be an out-of-range
|
||||
// index value, which would result in the instruction being undef.
|
||||
@ -4489,28 +4493,55 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
}
|
||||
}
|
||||
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X;
|
||||
switch (IID) {
|
||||
case Intrinsic::fabs: {
|
||||
if (SignBitMustBeZero(*ArgBegin, Q.TLI))
|
||||
return *ArgBegin;
|
||||
if (SignBitMustBeZero(IIOperand, Q.TLI))
|
||||
return IIOperand;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bswap: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bswap(bswap(x)) -> x
|
||||
if (match(IIOperand, m_BSwap(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bitreverse: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bitreverse(bitreverse(x)) -> x
|
||||
if (match(IIOperand, m_BitReverse(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::exp: {
|
||||
// exp(log(x)) -> x
|
||||
if (Q.CxtI->isFast() &&
|
||||
match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::exp2: {
|
||||
// exp2(log2(x)) -> x
|
||||
if (Q.CxtI->isFast() &&
|
||||
match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::log: {
|
||||
// log(exp(x)) -> x
|
||||
if (Q.CxtI->isFast() &&
|
||||
match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::log2: {
|
||||
// log2(exp2(x)) -> x
|
||||
if (Q.CxtI->isFast() &&
|
||||
match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
|
||||
return X;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -2358,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
|
||||
FoundMatch = true;
|
||||
}
|
||||
if (FoundMatch)
|
||||
return getAddExpr(Ops, Flags);
|
||||
return getAddExpr(Ops, Flags, Depth + 1);
|
||||
|
||||
// Check for truncates. If all the operands are truncated from the same
|
||||
// type, see if factoring out the truncate would permit the result to be
|
||||
@ -6402,9 +6402,8 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
|
||||
BasicBlock *Header = L->getHeader();
|
||||
|
||||
// Push all Loop-header PHIs onto the Worklist stack.
|
||||
for (BasicBlock::iterator I = Header->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||
Worklist.push_back(PN);
|
||||
for (PHINode &PN : Header->phis())
|
||||
Worklist.push_back(&PN);
|
||||
}
|
||||
|
||||
const ScalarEvolution::BackedgeTakenInfo &
|
||||
@ -7638,12 +7637,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
|
||||
if (!Latch)
|
||||
return nullptr;
|
||||
|
||||
for (auto &I : *Header) {
|
||||
PHINode *PHI = dyn_cast<PHINode>(&I);
|
||||
if (!PHI) break;
|
||||
auto *StartCST = getOtherIncomingValue(PHI, Latch);
|
||||
if (!StartCST) continue;
|
||||
CurrentIterVals[PHI] = StartCST;
|
||||
for (PHINode &PHI : Header->phis()) {
|
||||
if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
|
||||
CurrentIterVals[&PHI] = StartCST;
|
||||
}
|
||||
if (!CurrentIterVals.count(PN))
|
||||
return RetVal = nullptr;
|
||||
@ -7720,13 +7716,9 @@ const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
|
||||
BasicBlock *Latch = L->getLoopLatch();
|
||||
assert(Latch && "Should follow from NumIncomingValues == 2!");
|
||||
|
||||
for (auto &I : *Header) {
|
||||
PHINode *PHI = dyn_cast<PHINode>(&I);
|
||||
if (!PHI)
|
||||
break;
|
||||
auto *StartCST = getOtherIncomingValue(PHI, Latch);
|
||||
if (!StartCST) continue;
|
||||
CurrentIterVals[PHI] = StartCST;
|
||||
for (PHINode &PHI : Header->phis()) {
|
||||
if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
|
||||
CurrentIterVals[&PHI] = StartCST;
|
||||
}
|
||||
if (!CurrentIterVals.count(PN))
|
||||
return getCouldNotCompute();
|
||||
|
@ -1154,16 +1154,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||
IVIncInsertLoop &&
|
||||
SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
|
||||
|
||||
for (auto &I : *L->getHeader()) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
// Found first non-phi, the rest of instructions are also not Phis.
|
||||
if (!PN)
|
||||
break;
|
||||
|
||||
if (!SE.isSCEVable(PN->getType()))
|
||||
for (PHINode &PN : L->getHeader()->phis()) {
|
||||
if (!SE.isSCEVable(PN.getType()))
|
||||
continue;
|
||||
|
||||
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
|
||||
const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
|
||||
if (!PhiSCEV)
|
||||
continue;
|
||||
|
||||
@ -1175,16 +1170,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||
continue;
|
||||
|
||||
Instruction *TempIncV =
|
||||
cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
|
||||
cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));
|
||||
|
||||
// Check whether we can reuse this PHI node.
|
||||
if (LSRMode) {
|
||||
if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
|
||||
if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
|
||||
continue;
|
||||
if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
|
||||
continue;
|
||||
} else {
|
||||
if (!isNormalAddRecExprPHI(PN, TempIncV, L))
|
||||
if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1193,7 +1188,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||
IncV = TempIncV;
|
||||
TruncTy = nullptr;
|
||||
InvertStep = false;
|
||||
AddRecPhiMatch = PN;
|
||||
AddRecPhiMatch = &PN;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1203,7 +1198,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||
canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
|
||||
// Record the phi node. But don't stop we might find an exact match
|
||||
// later.
|
||||
AddRecPhiMatch = PN;
|
||||
AddRecPhiMatch = &PN;
|
||||
IncV = TempIncV;
|
||||
TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
|
||||
}
|
||||
@ -1863,12 +1858,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
|
||||
const TargetTransformInfo *TTI) {
|
||||
// Find integer phis in order of increasing width.
|
||||
SmallVector<PHINode*, 8> Phis;
|
||||
for (auto &I : *L->getHeader()) {
|
||||
if (auto *PN = dyn_cast<PHINode>(&I))
|
||||
Phis.push_back(PN);
|
||||
else
|
||||
break;
|
||||
}
|
||||
for (PHINode &PN : L->getHeader()->phis())
|
||||
Phis.push_back(&PN);
|
||||
|
||||
if (TTI)
|
||||
std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
|
||||
|
@ -2264,9 +2264,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
|
||||
// ashr X, C -> adds C sign bits. Vectors too.
|
||||
const APInt *ShAmt;
|
||||
if (match(U->getOperand(1), m_APInt(ShAmt))) {
|
||||
unsigned ShAmtLimited = ShAmt->getZExtValue();
|
||||
if (ShAmtLimited >= TyBits)
|
||||
if (ShAmt->uge(TyBits))
|
||||
break; // Bad shift.
|
||||
unsigned ShAmtLimited = ShAmt->getZExtValue();
|
||||
Tmp += ShAmtLimited;
|
||||
if (Tmp > TyBits) Tmp = TyBits;
|
||||
}
|
||||
@ -2277,9 +2277,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
|
||||
if (match(U->getOperand(1), m_APInt(ShAmt))) {
|
||||
// shl destroys sign bits.
|
||||
Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
|
||||
if (ShAmt->uge(TyBits) || // Bad shift.
|
||||
ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
|
||||
Tmp2 = ShAmt->getZExtValue();
|
||||
if (Tmp2 >= TyBits || // Bad shift.
|
||||
Tmp2 >= Tmp) break; // Shifted all sign bits out.
|
||||
return Tmp - Tmp2;
|
||||
}
|
||||
break;
|
||||
@ -4161,6 +4161,81 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
}
|
||||
|
||||
/// Recognize variations of:
|
||||
/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
|
||||
static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
|
||||
Value *CmpLHS, Value *CmpRHS,
|
||||
Value *TrueVal, Value *FalseVal) {
|
||||
// TODO: Allow FP min/max with nnan/nsz.
|
||||
assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
|
||||
|
||||
Value *A, *B;
|
||||
SelectPatternResult L = matchSelectPattern(TrueVal, A, B);
|
||||
if (!SelectPatternResult::isMinOrMax(L.Flavor))
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
|
||||
Value *C, *D;
|
||||
SelectPatternResult R = matchSelectPattern(FalseVal, C, D);
|
||||
if (L.Flavor != R.Flavor)
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
|
||||
// Match the compare to the min/max operations of the select operands.
|
||||
switch (L.Flavor) {
|
||||
case SPF_SMIN:
|
||||
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
|
||||
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
}
|
||||
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
|
||||
break;
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
case SPF_SMAX:
|
||||
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
|
||||
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
}
|
||||
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
|
||||
break;
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
case SPF_UMIN:
|
||||
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
|
||||
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
}
|
||||
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
|
||||
break;
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
case SPF_UMAX:
|
||||
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
|
||||
Pred = ICmpInst::getSwappedPredicate(Pred);
|
||||
std::swap(CmpLHS, CmpRHS);
|
||||
}
|
||||
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
|
||||
break;
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
default:
|
||||
llvm_unreachable("Bad flavor while matching min/max");
|
||||
}
|
||||
|
||||
// a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
|
||||
if (CmpLHS == A && CmpRHS == C && D == B)
|
||||
return {L.Flavor, SPNB_NA, false};
|
||||
|
||||
// a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
|
||||
if (CmpLHS == A && CmpRHS == D && C == B)
|
||||
return {L.Flavor, SPNB_NA, false};
|
||||
|
||||
// b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
|
||||
if (CmpLHS == B && CmpRHS == C && D == A)
|
||||
return {L.Flavor, SPNB_NA, false};
|
||||
|
||||
// b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
|
||||
if (CmpLHS == B && CmpRHS == D && C == A)
|
||||
return {L.Flavor, SPNB_NA, false};
|
||||
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
}
|
||||
|
||||
/// Match non-obvious integer minimum and maximum sequences.
|
||||
static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
|
||||
Value *CmpLHS, Value *CmpRHS,
|
||||
@ -4174,6 +4249,10 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
|
||||
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
|
||||
return SPR;
|
||||
|
||||
SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
|
||||
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
|
||||
return SPR;
|
||||
|
||||
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
|
||||
return {SPF_UNKNOWN, SPNB_NA, false};
|
||||
|
||||
|
@ -633,16 +633,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
|
||||
if (DestBBPred == BB)
|
||||
continue;
|
||||
|
||||
bool HasAllSameValue = true;
|
||||
BasicBlock::const_iterator DestBBI = DestBB->begin();
|
||||
while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
|
||||
if (DestPN->getIncomingValueForBlock(BB) !=
|
||||
DestPN->getIncomingValueForBlock(DestBBPred)) {
|
||||
HasAllSameValue = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (HasAllSameValue)
|
||||
if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
|
||||
return DestPN.getIncomingValueForBlock(BB) ==
|
||||
DestPN.getIncomingValueForBlock(DestBBPred);
|
||||
}))
|
||||
SameIncomingValueBBs.insert(DestBBPred);
|
||||
}
|
||||
|
||||
@ -672,9 +666,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
|
||||
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
|
||||
// the successor. If there are more complex condition (e.g. preheaders),
|
||||
// don't mess around with them.
|
||||
BasicBlock::const_iterator BBI = BB->begin();
|
||||
while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
|
||||
for (const User *U : PN->users()) {
|
||||
for (const PHINode &PN : BB->phis()) {
|
||||
for (const User *U : PN.users()) {
|
||||
const Instruction *UI = cast<Instruction>(U);
|
||||
if (UI->getParent() != DestBB || !isa<PHINode>(UI))
|
||||
return false;
|
||||
@ -713,10 +706,9 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
|
||||
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
|
||||
BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
|
||||
if (BBPreds.count(Pred)) { // Common predecessor?
|
||||
BBI = DestBB->begin();
|
||||
while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
|
||||
const Value *V1 = PN->getIncomingValueForBlock(Pred);
|
||||
const Value *V2 = PN->getIncomingValueForBlock(BB);
|
||||
for (const PHINode &PN : DestBB->phis()) {
|
||||
const Value *V1 = PN.getIncomingValueForBlock(Pred);
|
||||
const Value *V2 = PN.getIncomingValueForBlock(BB);
|
||||
|
||||
// If V2 is a phi node in BB, look up what the mapped value will be.
|
||||
if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
|
||||
@ -759,11 +751,9 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
|
||||
|
||||
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
|
||||
// to handle the new incoming edges it is about to have.
|
||||
PHINode *PN;
|
||||
for (BasicBlock::iterator BBI = DestBB->begin();
|
||||
(PN = dyn_cast<PHINode>(BBI)); ++BBI) {
|
||||
for (PHINode &PN : DestBB->phis()) {
|
||||
// Remove the incoming value for BB, and remember it.
|
||||
Value *InVal = PN->removeIncomingValue(BB, false);
|
||||
Value *InVal = PN.removeIncomingValue(BB, false);
|
||||
|
||||
// Two options: either the InVal is a phi node defined in BB or it is some
|
||||
// value that dominates BB.
|
||||
@ -771,17 +761,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
|
||||
if (InValPhi && InValPhi->getParent() == BB) {
|
||||
// Add all of the input values of the input PHI as inputs of this phi.
|
||||
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
|
||||
PN->addIncoming(InValPhi->getIncomingValue(i),
|
||||
InValPhi->getIncomingBlock(i));
|
||||
PN.addIncoming(InValPhi->getIncomingValue(i),
|
||||
InValPhi->getIncomingBlock(i));
|
||||
} else {
|
||||
// Otherwise, add one instance of the dominating value for each edge that
|
||||
// we will be adding.
|
||||
if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
|
||||
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
|
||||
PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
|
||||
PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
|
||||
} else {
|
||||
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
|
||||
PN->addIncoming(InVal, *PI);
|
||||
PN.addIncoming(InVal, *PI);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6497,22 +6487,16 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
|
||||
std::swap(TBB, FBB);
|
||||
|
||||
// Replace the old BB with the new BB.
|
||||
for (auto &I : *TBB) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
for (PHINode &PN : TBB->phis()) {
|
||||
int i;
|
||||
while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
|
||||
PN->setIncomingBlock(i, TmpBB);
|
||||
while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
|
||||
PN.setIncomingBlock(i, TmpBB);
|
||||
}
|
||||
|
||||
// Add another incoming edge form the new BB.
|
||||
for (auto &I : *FBB) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
auto *Val = PN->getIncomingValueForBlock(&BB);
|
||||
PN->addIncoming(Val, TmpBB);
|
||||
for (PHINode &PN : FBB->phis()) {
|
||||
auto *Val = PN.getIncomingValueForBlock(&BB);
|
||||
PN.addIncoming(Val, TmpBB);
|
||||
}
|
||||
|
||||
// Update the branch weights (from SelectionDAGBuilder::
|
||||
|
@ -815,7 +815,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
|
||||
if (CI.isInlineAsm())
|
||||
return translateInlineAsm(CI, MIRBuilder);
|
||||
|
||||
if (!F || !F->isIntrinsic()) {
|
||||
Intrinsic::ID ID = Intrinsic::not_intrinsic;
|
||||
if (F && F->isIntrinsic()) {
|
||||
ID = F->getIntrinsicID();
|
||||
if (TII && ID == Intrinsic::not_intrinsic)
|
||||
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
|
||||
}
|
||||
|
||||
if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
|
||||
unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
|
||||
SmallVector<unsigned, 8> Args;
|
||||
for (auto &Arg: CI.arg_operands())
|
||||
@ -827,10 +834,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
|
||||
});
|
||||
}
|
||||
|
||||
Intrinsic::ID ID = F->getIntrinsicID();
|
||||
if (TII && ID == Intrinsic::not_intrinsic)
|
||||
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
|
||||
|
||||
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
|
||||
|
||||
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
|
||||
|
@ -813,7 +813,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
|
||||
|
||||
unsigned Zero = MRI.createGenericVirtualRegister(Ty);
|
||||
MIRBuilder.buildConstant(Zero, 0);
|
||||
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
|
||||
|
||||
// For *signed* multiply, overflow is detected by checking:
|
||||
// (hi != (lo >> bitwidth-1))
|
||||
if (Opcode == TargetOpcode::G_SMULH) {
|
||||
unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
|
||||
unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
|
||||
MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
|
||||
MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
|
||||
.addDef(Shifted)
|
||||
.addUse(Res)
|
||||
.addUse(ShiftAmt);
|
||||
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
|
||||
} else {
|
||||
MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
return Legalized;
|
||||
}
|
||||
|
@ -136,8 +136,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
|
||||
MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
|
||||
|
||||
MCAsmBackend *MAB =
|
||||
getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
|
||||
Options.MCOptions);
|
||||
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
|
||||
auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
|
||||
MCStreamer *S = getTarget().createAsmStreamer(
|
||||
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
|
||||
@ -151,8 +150,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
|
||||
// emission fails.
|
||||
MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
|
||||
MCAsmBackend *MAB =
|
||||
getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
|
||||
Options.MCOptions);
|
||||
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
|
||||
if (!MCE || !MAB)
|
||||
return true;
|
||||
|
||||
@ -225,17 +223,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
|
||||
|
||||
// Create the code emitter for the target if it exists. If not, .o file
|
||||
// emission fails.
|
||||
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
|
||||
const MCRegisterInfo &MRI = *getMCRegisterInfo();
|
||||
MCCodeEmitter *MCE =
|
||||
getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
|
||||
MCAsmBackend *MAB =
|
||||
getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
|
||||
Options.MCOptions);
|
||||
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
|
||||
if (!MCE || !MAB)
|
||||
return true;
|
||||
|
||||
const Triple &T = getTargetTriple();
|
||||
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
|
||||
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
|
||||
T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
|
||||
std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
|
||||
|
@ -242,8 +242,11 @@ class UserValue {
|
||||
// We are storing a MachineOperand outside a MachineInstr.
|
||||
locations.back().clearParent();
|
||||
// Don't store def operands.
|
||||
if (locations.back().isReg())
|
||||
if (locations.back().isReg()) {
|
||||
if (locations.back().isDef())
|
||||
locations.back().setIsDead(false);
|
||||
locations.back().setIsUse();
|
||||
}
|
||||
return locations.size() - 1;
|
||||
}
|
||||
|
||||
|
@ -3850,7 +3850,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
|
||||
return false;
|
||||
}
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::AssertZext: {
|
||||
unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
|
||||
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
|
||||
@ -13783,30 +13782,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with elidable overlapping chained stores.
|
||||
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
|
||||
if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
|
||||
ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
|
||||
!ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
|
||||
BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
|
||||
BaseIndexOffset ST1BasePtr =
|
||||
BaseIndexOffset::match(ST1->getBasePtr(), DAG);
|
||||
unsigned STBytes = ST->getMemoryVT().getStoreSize();
|
||||
unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
|
||||
int64_t PtrDiff;
|
||||
// If this is a store who's preceeding store to a subset of the same
|
||||
// memory and no one other node is chained to that store we can
|
||||
// effectively drop the store. Do not remove stores to undef as they may
|
||||
// be used as data sinks.
|
||||
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
|
||||
if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
|
||||
!ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
|
||||
ST->getMemoryVT() == ST1->getMemoryVT()) {
|
||||
// If this is a store followed by a store with the same value to the same
|
||||
// location, then the store is dead/noop.
|
||||
if (ST1->getValue() == Value) {
|
||||
// The store is dead, remove it.
|
||||
return Chain;
|
||||
}
|
||||
|
||||
if (((ST->getBasePtr() == ST1->getBasePtr()) &&
|
||||
(ST->getValue() == ST1->getValue())) ||
|
||||
(STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
|
||||
(0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
|
||||
// If this is a store who's preceeding store to the same location
|
||||
// and no one other node is chained to that store we can effectively
|
||||
// drop the store. Do not remove stores to undef as they may be used as
|
||||
// data sinks.
|
||||
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
|
||||
!ST1->getBasePtr().isUndef()) {
|
||||
// ST1 is fully overwritten and can be elided. Combine with it's chain
|
||||
// value.
|
||||
CombineTo(ST1, ST1->getChain());
|
||||
return SDValue(N, 0);
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
|
||||
// truncating store. We can do this even if this is already a truncstore.
|
||||
|
@ -2051,11 +2051,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
|
||||
// At this point we know that there is a 1-1 correspondence between LLVM PHI
|
||||
// nodes and Machine PHI nodes, but the incoming operands have not been
|
||||
// emitted yet.
|
||||
for (BasicBlock::const_iterator I = SuccBB->begin();
|
||||
const auto *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
|
||||
for (const PHINode &PN : SuccBB->phis()) {
|
||||
// Ignore dead phi's.
|
||||
if (PN->use_empty())
|
||||
if (PN.use_empty())
|
||||
continue;
|
||||
|
||||
// Only handle legal types. Two interesting things to note here. First,
|
||||
@ -2064,7 +2062,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
|
||||
// own moves. Second, this check is necessary because FastISel doesn't
|
||||
// use CreateRegs to create registers, so it always creates
|
||||
// exactly one register for each non-void instruction.
|
||||
EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
|
||||
EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
|
||||
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
|
||||
// Handle integer promotions, though, because they're common and easy.
|
||||
if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
|
||||
@ -2073,11 +2071,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
|
||||
}
|
||||
}
|
||||
|
||||
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
|
||||
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
|
||||
|
||||
// Set the DebugLoc for the copy. Prefer the location of the operand
|
||||
// if there is one; use the location of the PHI otherwise.
|
||||
DbgLoc = PN->getDebugLoc();
|
||||
DbgLoc = PN.getDebugLoc();
|
||||
if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
|
||||
DbgLoc = Inst->getDebugLoc();
|
||||
|
||||
|
@ -257,20 +257,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
|
||||
|
||||
// Create Machine PHI nodes for LLVM PHI nodes, lowering them as
|
||||
// appropriate.
|
||||
for (BasicBlock::const_iterator I = BB.begin();
|
||||
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
if (PN->use_empty()) continue;
|
||||
|
||||
// Skip empty types
|
||||
if (PN->getType()->isEmptyTy())
|
||||
for (const PHINode &PN : BB.phis()) {
|
||||
if (PN.use_empty())
|
||||
continue;
|
||||
|
||||
DebugLoc DL = PN->getDebugLoc();
|
||||
unsigned PHIReg = ValueMap[PN];
|
||||
// Skip empty types
|
||||
if (PN.getType()->isEmptyTy())
|
||||
continue;
|
||||
|
||||
DebugLoc DL = PN.getDebugLoc();
|
||||
unsigned PHIReg = ValueMap[&PN];
|
||||
assert(PHIReg && "PHI node does not have an assigned virtual register!");
|
||||
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
|
||||
ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
|
||||
for (EVT VT : ValueVTs) {
|
||||
unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
|
||||
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
|
||||
|
@ -139,14 +139,14 @@ class VectorLegalizer {
|
||||
|
||||
/// \brief Implements [SU]INT_TO_FP vector promotion.
|
||||
///
|
||||
/// This is a [zs]ext of the input operand to the next size up.
|
||||
/// This is a [zs]ext of the input operand to a larger integer type.
|
||||
SDValue PromoteINT_TO_FP(SDValue Op);
|
||||
|
||||
/// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
|
||||
///
|
||||
/// It is promoted to the next size up integer type. The result is then
|
||||
/// It is promoted to a larger integer type. The result is then
|
||||
/// truncated back to the original type.
|
||||
SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
|
||||
SDValue PromoteFP_TO_INT(SDValue Op);
|
||||
|
||||
public:
|
||||
VectorLegalizer(SelectionDAG& dag) :
|
||||
@ -431,7 +431,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
|
||||
case ISD::FP_TO_UINT:
|
||||
case ISD::FP_TO_SINT:
|
||||
// Promote the operation by extending the operand.
|
||||
return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
|
||||
return PromoteFP_TO_INT(Op);
|
||||
}
|
||||
|
||||
// There are currently two cases of vector promotion:
|
||||
@ -472,20 +472,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
|
||||
SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
|
||||
// INT_TO_FP operations may require the input operand be promoted even
|
||||
// when the type is otherwise legal.
|
||||
EVT VT = Op.getOperand(0).getValueType();
|
||||
assert(Op.getNode()->getNumValues() == 1 &&
|
||||
"Can't promote a vector with multiple results!");
|
||||
MVT VT = Op.getOperand(0).getSimpleValueType();
|
||||
MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
|
||||
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
|
||||
"Vectors have different number of elements!");
|
||||
|
||||
// Normal getTypeToPromoteTo() doesn't work here, as that will promote
|
||||
// by widening the vector w/ the same element width and twice the number
|
||||
// of elements. We want the other way around, the same number of elements,
|
||||
// each twice the width.
|
||||
//
|
||||
// Increase the bitwidth of the element to the next pow-of-two
|
||||
// (which is greater than 8 bits).
|
||||
|
||||
EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
|
||||
assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
|
||||
SDLoc dl(Op);
|
||||
SmallVector<SDValue, 4> Operands(Op.getNumOperands());
|
||||
|
||||
@ -505,35 +496,28 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
|
||||
// elements and then truncate the result. This is different from the default
|
||||
// PromoteVector which uses bitcast to promote thus assumning that the
|
||||
// promoted vector type has the same overall size.
|
||||
SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
|
||||
assert(Op.getNode()->getNumValues() == 1 &&
|
||||
"Can't promote a vector with multiple results!");
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
|
||||
assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
|
||||
"Vectors have different number of elements!");
|
||||
|
||||
EVT NewVT = VT;
|
||||
unsigned NewOpc;
|
||||
while (true) {
|
||||
NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
|
||||
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
|
||||
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
|
||||
NewOpc = ISD::FP_TO_SINT;
|
||||
break;
|
||||
}
|
||||
if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
|
||||
NewOpc = ISD::FP_TO_UINT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unsigned NewOpc = Op->getOpcode();
|
||||
// Change FP_TO_UINT to FP_TO_SINT if possible.
|
||||
// TODO: Should we only do this if FP_TO_UINT itself isn't legal?
|
||||
if (NewOpc == ISD::FP_TO_UINT &&
|
||||
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::FP_TO_SINT;
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
|
||||
SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
|
||||
|
||||
// Assert that the converted value fits in the original type. If it doesn't
|
||||
// (eg: because the value being converted is too big), then the result of the
|
||||
// original operation was undefined anyway, so the assert is still correct.
|
||||
Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
|
||||
: ISD::AssertSext,
|
||||
dl, NewVT, Promoted,
|
||||
dl, NVT, Promoted,
|
||||
DAG.getValueType(VT.getScalarType()));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
|
||||
}
|
||||
|
@ -3374,11 +3374,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
SDValue InOp = N->getOperand(0);
|
||||
// If some legalization strategy other than widening is used on the operand,
|
||||
// we can't safely assume that just extending the low lanes is the correct
|
||||
// transformation.
|
||||
if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
|
||||
return WidenVecOp_Convert(N);
|
||||
assert(getTypeAction(InOp.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unexpected type action");
|
||||
InOp = GetWidenedVector(InOp);
|
||||
assert(VT.getVectorNumElements() <
|
||||
InOp.getValueType().getVectorNumElements() &&
|
||||
@ -3440,20 +3438,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
|
||||
// Since the result is legal and the input is illegal, it is unlikely that we
|
||||
// can fix the input to a legal type so unroll the convert into some scalar
|
||||
// code and create a nasty build vector.
|
||||
// Since the result is legal and the input is illegal.
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
SDLoc dl(N);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
SDValue InOp = N->getOperand(0);
|
||||
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
|
||||
InOp = GetWidenedVector(InOp);
|
||||
assert(getTypeAction(InOp.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unexpected type action");
|
||||
InOp = GetWidenedVector(InOp);
|
||||
EVT InVT = InOp.getValueType();
|
||||
unsigned Opcode = N->getOpcode();
|
||||
|
||||
// See if a widened result type would be legal, if so widen the node.
|
||||
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
|
||||
InVT.getVectorNumElements());
|
||||
if (TLI.isTypeLegal(WideVT)) {
|
||||
SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
EVT InEltVT = InVT.getVectorElementType();
|
||||
|
||||
unsigned Opcode = N->getOpcode();
|
||||
// Unroll the convert into some scalar code and create a nasty build vector.
|
||||
SmallVector<SDValue, 16> Ops(NumElts);
|
||||
for (unsigned i=0; i < NumElts; ++i)
|
||||
Ops[i] = DAG.getNode(
|
||||
@ -3506,8 +3515,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
|
||||
unsigned NumOperands = N->getNumOperands();
|
||||
for (unsigned i=0; i < NumOperands; ++i) {
|
||||
SDValue InOp = N->getOperand(i);
|
||||
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
|
||||
InOp = GetWidenedVector(InOp);
|
||||
assert(getTypeAction(InOp.getValueType()) ==
|
||||
TargetLowering::TypeWidenVector &&
|
||||
"Unexpected type action");
|
||||
InOp = GetWidenedVector(InOp);
|
||||
for (unsigned j=0; j < NumInElts; ++j)
|
||||
Ops[Idx++] = DAG.getNode(
|
||||
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
|
||||
|
@ -8940,17 +8940,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
|
||||
// At this point we know that there is a 1-1 correspondence between LLVM PHI
|
||||
// nodes and Machine PHI nodes, but the incoming operands have not been
|
||||
// emitted yet.
|
||||
for (BasicBlock::const_iterator I = SuccBB->begin();
|
||||
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
for (const PHINode &PN : SuccBB->phis()) {
|
||||
// Ignore dead phi's.
|
||||
if (PN->use_empty()) continue;
|
||||
if (PN.use_empty())
|
||||
continue;
|
||||
|
||||
// Skip empty types
|
||||
if (PN->getType()->isEmptyTy())
|
||||
if (PN.getType()->isEmptyTy())
|
||||
continue;
|
||||
|
||||
unsigned Reg;
|
||||
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
|
||||
const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);
|
||||
|
||||
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
|
||||
unsigned &RegOut = ConstantsOut[C];
|
||||
@ -8977,7 +8977,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
|
||||
// the input for this MBB.
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
|
||||
ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
|
||||
for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
|
||||
EVT VT = ValueVTs[vti];
|
||||
unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
|
||||
|
@ -1445,13 +1445,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
|
||||
}
|
||||
|
||||
if (AllPredsVisited) {
|
||||
for (BasicBlock::const_iterator I = LLVMBB->begin();
|
||||
const PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||
FuncInfo->ComputePHILiveOutRegInfo(PN);
|
||||
for (const PHINode &PN : LLVMBB->phis())
|
||||
FuncInfo->ComputePHILiveOutRegInfo(&PN);
|
||||
} else {
|
||||
for (BasicBlock::const_iterator I = LLVMBB->begin();
|
||||
const PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||
FuncInfo->InvalidatePHILiveOutRegInfo(PN);
|
||||
for (const PHINode &PN : LLVMBB->phis())
|
||||
FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
|
||||
}
|
||||
|
||||
FuncInfo->VisitedBBs.insert(LLVMBB);
|
||||
|
@ -712,8 +712,11 @@ bool TargetPassConfig::addCoreISelPasses() {
|
||||
|
||||
// Ask the target for an isel.
|
||||
// Enable GlobalISel if the target wants to, but allow that to be overriden.
|
||||
// Explicitly enabling fast-isel should override implicitly enabled
|
||||
// global-isel.
|
||||
if (EnableGlobalISel == cl::BOU_TRUE ||
|
||||
(EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) {
|
||||
(EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() &&
|
||||
EnableFastISelOption != cl::BOU_TRUE)) {
|
||||
if (addIRTranslator())
|
||||
return true;
|
||||
|
||||
@ -1133,7 +1136,12 @@ bool TargetPassConfig::isGlobalISelEnabled() const {
|
||||
}
|
||||
|
||||
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
|
||||
return EnableGlobalISelAbort == 1;
|
||||
if (EnableGlobalISelAbort.getNumOccurrences() > 0)
|
||||
return EnableGlobalISelAbort == 1;
|
||||
|
||||
// When no abort behaviour is specified, we don't abort if the target says
|
||||
// that GISel is enabled.
|
||||
return !isGlobalISelEnabled();
|
||||
}
|
||||
|
||||
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
|
||||
|
@ -838,17 +838,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
|
||||
for (auto &BBMapping : Orig2Clone) {
|
||||
BasicBlock *OldBlock = BBMapping.first;
|
||||
BasicBlock *NewBlock = BBMapping.second;
|
||||
for (Instruction &OldI : *OldBlock) {
|
||||
auto *OldPN = dyn_cast<PHINode>(&OldI);
|
||||
if (!OldPN)
|
||||
break;
|
||||
UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
|
||||
for (PHINode &OldPN : OldBlock->phis()) {
|
||||
UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
|
||||
}
|
||||
for (Instruction &NewI : *NewBlock) {
|
||||
auto *NewPN = dyn_cast<PHINode>(&NewI);
|
||||
if (!NewPN)
|
||||
break;
|
||||
UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
|
||||
for (PHINode &NewPN : NewBlock->phis()) {
|
||||
UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
|
||||
}
|
||||
}
|
||||
|
||||
@ -858,17 +852,13 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
|
||||
BasicBlock *OldBlock = BBMapping.first;
|
||||
BasicBlock *NewBlock = BBMapping.second;
|
||||
for (BasicBlock *SuccBB : successors(NewBlock)) {
|
||||
for (Instruction &SuccI : *SuccBB) {
|
||||
auto *SuccPN = dyn_cast<PHINode>(&SuccI);
|
||||
if (!SuccPN)
|
||||
break;
|
||||
|
||||
for (PHINode &SuccPN : SuccBB->phis()) {
|
||||
// Ok, we have a PHI node. Figure out what the incoming value was for
|
||||
// the OldBlock.
|
||||
int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
|
||||
int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
|
||||
if (OldBlockIdx == -1)
|
||||
break;
|
||||
Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
|
||||
Value *IV = SuccPN.getIncomingValue(OldBlockIdx);
|
||||
|
||||
// Remap the value if necessary.
|
||||
if (auto *Inst = dyn_cast<Instruction>(IV)) {
|
||||
@ -877,7 +867,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
|
||||
IV = I->second;
|
||||
}
|
||||
|
||||
SuccPN->addIncoming(IV, NewBlock);
|
||||
SuccPN.addIncoming(IV, NewBlock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -264,7 +264,8 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const {
|
||||
}
|
||||
|
||||
iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
|
||||
return make_range<phi_iterator>(dyn_cast<PHINode>(&front()), nullptr);
|
||||
PHINode *P = empty() ? nullptr : dyn_cast<PHINode>(&*begin());
|
||||
return make_range<phi_iterator>(P, nullptr);
|
||||
}
|
||||
|
||||
/// This method is used to notify a BasicBlock that the
|
||||
|
@ -2210,24 +2210,23 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
|
||||
SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
|
||||
SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
|
||||
std::sort(Preds.begin(), Preds.end());
|
||||
PHINode *PN;
|
||||
for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
|
||||
for (const PHINode &PN : BB.phis()) {
|
||||
// Ensure that PHI nodes have at least one entry!
|
||||
Assert(PN->getNumIncomingValues() != 0,
|
||||
Assert(PN.getNumIncomingValues() != 0,
|
||||
"PHI nodes must have at least one entry. If the block is dead, "
|
||||
"the PHI should be removed!",
|
||||
PN);
|
||||
Assert(PN->getNumIncomingValues() == Preds.size(),
|
||||
&PN);
|
||||
Assert(PN.getNumIncomingValues() == Preds.size(),
|
||||
"PHINode should have one entry for each predecessor of its "
|
||||
"parent basic block!",
|
||||
PN);
|
||||
&PN);
|
||||
|
||||
// Get and sort all incoming values in the PHI node...
|
||||
Values.clear();
|
||||
Values.reserve(PN->getNumIncomingValues());
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
|
||||
Values.push_back(std::make_pair(PN->getIncomingBlock(i),
|
||||
PN->getIncomingValue(i)));
|
||||
Values.reserve(PN.getNumIncomingValues());
|
||||
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
|
||||
Values.push_back(
|
||||
std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i)));
|
||||
std::sort(Values.begin(), Values.end());
|
||||
|
||||
for (unsigned i = 0, e = Values.size(); i != e; ++i) {
|
||||
@ -2239,12 +2238,12 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
|
||||
Values[i].second == Values[i - 1].second,
|
||||
"PHI node has multiple entries for the same basic block with "
|
||||
"different incoming values!",
|
||||
PN, Values[i].first, Values[i].second, Values[i - 1].second);
|
||||
&PN, Values[i].first, Values[i].second, Values[i - 1].second);
|
||||
|
||||
// Check to make sure that the predecessors and PHI node entries are
|
||||
// matched up.
|
||||
Assert(Values[i].first == Preds[i],
|
||||
"PHI node entries do not match predecessors!", PN,
|
||||
"PHI node entries do not match predecessors!", &PN,
|
||||
Values[i].first, Preds[i]);
|
||||
}
|
||||
}
|
||||
|
@ -447,7 +447,7 @@ bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) {
|
||||
Lex();
|
||||
StringRef Name;
|
||||
if (getParser().parseIdentifier(Name))
|
||||
return true;
|
||||
return TokError("invalid metadata symbol");
|
||||
Associated = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
|
||||
if (!Associated || !Associated->isInSection())
|
||||
return TokError("symbol is not in a section: " + Name);
|
||||
|
@ -412,10 +412,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
|
||||
// We provide the opt remark emitter pass for LICM to use. We only need to do
|
||||
// this once as it is immutable.
|
||||
FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
|
||||
FPM.addPass(SimplifyCFGPass());
|
||||
FPM.addPass(InstCombinePass());
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));
|
||||
|
||||
// Eliminate redundancies.
|
||||
if (Level != O1) {
|
||||
@ -450,7 +450,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
|
||||
FPM.addPass(JumpThreadingPass());
|
||||
FPM.addPass(CorrelatedValuePropagationPass());
|
||||
FPM.addPass(DSEPass());
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
|
||||
|
||||
for (auto &C : ScalarOptimizerLateEPCallbacks)
|
||||
C(FPM, Level);
|
||||
@ -510,7 +510,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
|
||||
MPM.addPass(PGOInstrumentationGen());
|
||||
|
||||
FunctionPassManager FPM;
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
|
||||
FPM.addPass(
|
||||
createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
|
||||
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
|
||||
|
||||
// Add the profile lowering pass.
|
||||
@ -730,7 +731,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
||||
C(OptimizePM, Level);
|
||||
|
||||
// First rotate loops that may have been un-rotated by prior passes.
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
|
||||
OptimizePM.addPass(
|
||||
createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
|
||||
|
||||
// Distribute loops to allow partial vectorization. I.e. isolate dependences
|
||||
// into separate loop that would otherwise inhibit vectorization. This is
|
||||
@ -777,7 +779,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
||||
OptimizePM.addPass(LoopUnrollPass(Level));
|
||||
OptimizePM.addPass(InstCombinePass());
|
||||
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
|
||||
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
|
||||
|
||||
// Now that we've vectorized and unrolled loops, we may have more refined
|
||||
// alignment information, try to re-derive it here.
|
||||
@ -1533,7 +1535,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
|
||||
DebugLogging))
|
||||
return false;
|
||||
// Add the nested pass manager with the appropriate adaptor.
|
||||
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
|
||||
FPM.addPass(
|
||||
createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
|
||||
return true;
|
||||
}
|
||||
if (auto Count = parseRepeatPassName(Name)) {
|
||||
|
@ -873,6 +873,45 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||
NewArgv.push_back(nullptr);
|
||||
}
|
||||
|
||||
void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver,
|
||||
SmallVectorImpl<const char *> &NewArgv,
|
||||
bool MarkEOLs) {
|
||||
for (const char *Cur = Source.begin(); Cur != Source.end();) {
|
||||
SmallString<128> Line;
|
||||
// Check for comment line.
|
||||
if (isWhitespace(*Cur)) {
|
||||
while (Cur != Source.end() && isWhitespace(*Cur))
|
||||
++Cur;
|
||||
continue;
|
||||
}
|
||||
if (*Cur == '#') {
|
||||
while (Cur != Source.end() && *Cur != '\n')
|
||||
++Cur;
|
||||
continue;
|
||||
}
|
||||
// Find end of the current line.
|
||||
const char *Start = Cur;
|
||||
for (const char *End = Source.end(); Cur != End; ++Cur) {
|
||||
if (*Cur == '\\') {
|
||||
if (Cur + 1 != End) {
|
||||
++Cur;
|
||||
if (*Cur == '\n' ||
|
||||
(*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) {
|
||||
Line.append(Start, Cur - 1);
|
||||
if (*Cur == '\r')
|
||||
++Cur;
|
||||
Start = Cur + 1;
|
||||
}
|
||||
}
|
||||
} else if (*Cur == '\n')
|
||||
break;
|
||||
}
|
||||
// Tokenize line.
|
||||
Line.append(Start, Cur);
|
||||
cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs);
|
||||
}
|
||||
}
|
||||
|
||||
// It is called byte order marker but the UTF-8 BOM is actually not affected
|
||||
// by the host system's endianness.
|
||||
static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
|
||||
@ -977,6 +1016,15 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
|
||||
return AllExpanded;
|
||||
}
|
||||
|
||||
bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
|
||||
SmallVectorImpl<const char *> &Argv) {
|
||||
if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv,
|
||||
/*MarkEOLs*/ false, /*RelativeNames*/ true))
|
||||
return false;
|
||||
return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv,
|
||||
/*MarkEOLs*/ false, /*RelativeNames*/ true);
|
||||
}
|
||||
|
||||
/// ParseEnvironmentOptions - An alternative entry point to the
|
||||
/// CommandLine library, which allows you to read the program's name
|
||||
/// from the caller (as PROGNAME) and its command-line arguments from
|
||||
|
@ -632,16 +632,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
|
||||
// elements smaller than i32, so promote the input to i32 first.
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
|
||||
// i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
|
||||
// -> v8f16 conversions.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
|
||||
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
|
||||
|
@ -756,27 +756,31 @@ class ZPRRegOp <string Suffix, AsmOperandClass C,
|
||||
|
||||
//******************************************************************************
|
||||
|
||||
// SVE predicate register class.
|
||||
def PPR : RegisterClass<"AArch64",
|
||||
[nxv16i1, nxv8i1, nxv4i1, nxv2i1],
|
||||
16, (sequence "P%u", 0, 15)> {
|
||||
// SVE predicate register classes.
|
||||
class PPRClass<int lastreg> : RegisterClass<
|
||||
"AArch64",
|
||||
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,
|
||||
(sequence "P%u", 0, lastreg)> {
|
||||
let Size = 16;
|
||||
}
|
||||
|
||||
class PPRAsmOperand <string name, int Width>: AsmOperandClass {
|
||||
def PPR : PPRClass<15>;
|
||||
def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.
|
||||
|
||||
class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
|
||||
let Name = "SVE" # name # "Reg";
|
||||
let PredicateMethod = "isSVEVectorRegOfWidth<"
|
||||
# Width # ", AArch64::PPRRegClassID>";
|
||||
# Width # ", " # "AArch64::" # RegClass # "RegClassID>";
|
||||
let DiagnosticType = "InvalidSVE" # name # "Reg";
|
||||
let RenderMethod = "addRegOperands";
|
||||
let ParserMethod = "tryParseSVEPredicateVector";
|
||||
}
|
||||
|
||||
def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", -1>;
|
||||
def PPRAsmOp8 : PPRAsmOperand<"PredicateB", 8>;
|
||||
def PPRAsmOp16 : PPRAsmOperand<"PredicateH", 16>;
|
||||
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", 32>;
|
||||
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", 64>;
|
||||
def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", -1>;
|
||||
def PPRAsmOp8 : PPRAsmOperand<"PredicateB", "PPR", 8>;
|
||||
def PPRAsmOp16 : PPRAsmOperand<"PredicateH", "PPR", 16>;
|
||||
def PPRAsmOp32 : PPRAsmOperand<"PredicateS", "PPR", 32>;
|
||||
def PPRAsmOp64 : PPRAsmOperand<"PredicateD", "PPR", 64>;
|
||||
|
||||
def PPRAny : PPRRegOp<"", PPRAsmOpAny, PPR>;
|
||||
def PPR8 : PPRRegOp<"b", PPRAsmOp8, PPR>;
|
||||
@ -784,6 +788,18 @@ def PPR16 : PPRRegOp<"h", PPRAsmOp16, PPR>;
|
||||
def PPR32 : PPRRegOp<"s", PPRAsmOp32, PPR>;
|
||||
def PPR64 : PPRRegOp<"d", PPRAsmOp64, PPR>;
|
||||
|
||||
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", -1>;
|
||||
def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
|
||||
def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
|
||||
def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
|
||||
def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
|
||||
|
||||
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, PPR_3b>;
|
||||
def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, PPR_3b>;
|
||||
def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, PPR_3b>;
|
||||
def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, PPR_3b>;
|
||||
def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, PPR_3b>;
|
||||
|
||||
//******************************************************************************
|
||||
|
||||
// SVE vector register class
|
||||
|
@ -136,7 +136,7 @@ static cl::opt<bool>
|
||||
static cl::opt<int> EnableGlobalISelAtO(
|
||||
"aarch64-enable-global-isel-at-O", cl::Hidden,
|
||||
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
|
||||
cl::init(-1));
|
||||
cl::init(0));
|
||||
|
||||
static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
|
||||
cl::init(true), cl::Hidden);
|
||||
|
@ -819,6 +819,10 @@ class AArch64Operand : public MCParsedAsmOperand {
|
||||
}
|
||||
|
||||
bool isReg() const override {
|
||||
return Kind == k_Register;
|
||||
}
|
||||
|
||||
bool isScalarReg() const {
|
||||
return Kind == k_Register && Reg.Kind == RegKind::Scalar;
|
||||
}
|
||||
|
||||
@ -839,6 +843,7 @@ class AArch64Operand : public MCParsedAsmOperand {
|
||||
RK = RegKind::SVEDataVector;
|
||||
break;
|
||||
case AArch64::PPRRegClassID:
|
||||
case AArch64::PPR_3bRegClassID:
|
||||
RK = RegKind::SVEPredicateVector;
|
||||
break;
|
||||
default:
|
||||
@ -3148,7 +3153,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
|
||||
return true;
|
||||
|
||||
if (Operands.size() < 2 ||
|
||||
!static_cast<AArch64Operand &>(*Operands[1]).isReg())
|
||||
!static_cast<AArch64Operand &>(*Operands[1]).isScalarReg())
|
||||
return Error(Loc, "Only valid when first operand is register");
|
||||
|
||||
bool IsXReg =
|
||||
@ -3648,6 +3653,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
|
||||
case Match_InvalidSVEPredicateSReg:
|
||||
case Match_InvalidSVEPredicateDReg:
|
||||
return Error(Loc, "invalid predicate register.");
|
||||
case Match_InvalidSVEPredicate3bAnyReg:
|
||||
case Match_InvalidSVEPredicate3bBReg:
|
||||
case Match_InvalidSVEPredicate3bHReg:
|
||||
case Match_InvalidSVEPredicate3bSReg:
|
||||
case Match_InvalidSVEPredicate3bDReg:
|
||||
return Error(Loc, "restricted predicate has range [0, 7].");
|
||||
default:
|
||||
llvm_unreachable("unexpected error code!");
|
||||
}
|
||||
@ -3670,7 +3681,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
if (NumOperands == 4 && Tok == "lsl") {
|
||||
AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
|
||||
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
|
||||
if (Op2.isReg() && Op3.isImm()) {
|
||||
if (Op2.isScalarReg() && Op3.isImm()) {
|
||||
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
|
||||
if (Op3CE) {
|
||||
uint64_t Op3Val = Op3CE->getValue();
|
||||
@ -3702,7 +3713,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]);
|
||||
AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]);
|
||||
|
||||
if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) {
|
||||
if (Op1.isScalarReg() && LSBOp.isImm() && WidthOp.isImm()) {
|
||||
const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm());
|
||||
const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm());
|
||||
|
||||
@ -3758,7 +3769,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
|
||||
AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
|
||||
|
||||
if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
|
||||
if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
|
||||
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
|
||||
const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
|
||||
|
||||
@ -3822,7 +3833,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
|
||||
AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
|
||||
|
||||
if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
|
||||
if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
|
||||
const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
|
||||
const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
|
||||
|
||||
@ -3901,7 +3912,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
// The source register can be Wn here, but the matcher expects a
|
||||
// GPR64. Twiddle it here if necessary.
|
||||
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
|
||||
if (Op.isReg()) {
|
||||
if (Op.isScalarReg()) {
|
||||
unsigned Reg = getXRegFromWReg(Op.getReg());
|
||||
Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
|
||||
Op.getStartLoc(), Op.getEndLoc(),
|
||||
@ -3911,13 +3922,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
// FIXME: Likewise for sxt[bh] with a Xd dst operand
|
||||
else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
|
||||
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
|
||||
if (Op.isReg() &&
|
||||
if (Op.isScalarReg() &&
|
||||
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
|
||||
Op.getReg())) {
|
||||
// The source register can be Wn here, but the matcher expects a
|
||||
// GPR64. Twiddle it here if necessary.
|
||||
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
|
||||
if (Op.isReg()) {
|
||||
if (Op.isScalarReg()) {
|
||||
unsigned Reg = getXRegFromWReg(Op.getReg());
|
||||
Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
|
||||
Op.getStartLoc(),
|
||||
@ -3928,13 +3939,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
// FIXME: Likewise for uxt[bh] with a Xd dst operand
|
||||
else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
|
||||
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
|
||||
if (Op.isReg() &&
|
||||
if (Op.isScalarReg() &&
|
||||
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
|
||||
Op.getReg())) {
|
||||
// The source register can be Wn here, but the matcher expects a
|
||||
// GPR32. Twiddle it here if necessary.
|
||||
AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
|
||||
if (Op.isReg()) {
|
||||
if (Op.isScalarReg()) {
|
||||
unsigned Reg = getWRegFromXReg(Op.getReg());
|
||||
Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
|
||||
Op.getStartLoc(),
|
||||
@ -4077,6 +4088,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
|
||||
case Match_InvalidSVEPredicateHReg:
|
||||
case Match_InvalidSVEPredicateSReg:
|
||||
case Match_InvalidSVEPredicateDReg:
|
||||
case Match_InvalidSVEPredicate3bAnyReg:
|
||||
case Match_InvalidSVEPredicate3bBReg:
|
||||
case Match_InvalidSVEPredicate3bHReg:
|
||||
case Match_InvalidSVEPredicate3bSReg:
|
||||
case Match_InvalidSVEPredicate3bDReg:
|
||||
case Match_MSR:
|
||||
case Match_MRS: {
|
||||
if (ErrorInfo >= Operands.size())
|
||||
|
@ -91,6 +91,9 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
|
||||
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decode);
|
||||
LLVM_ATTRIBUTE_UNUSED static DecodeStatus
|
||||
DecodePPR_3bRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address,
|
||||
const void *Decode);
|
||||
|
||||
static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
|
||||
uint64_t Address,
|
||||
@ -481,6 +484,16 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
|
||||
return Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Addr,
|
||||
const void* Decoder) {
|
||||
if (RegNo > 7)
|
||||
return Fail;
|
||||
|
||||
// Just reuse the PPR decode table
|
||||
return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder);
|
||||
}
|
||||
|
||||
static const unsigned VectorDecoderTable[] = {
|
||||
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
|
||||
AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
|
||||
|
@ -605,10 +605,10 @@ class COFFAArch64AsmBackend : public AArch64AsmBackend {
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple,
|
||||
StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TheTriple = STI.getTargetTriple();
|
||||
if (TheTriple.isOSBinFormatMachO())
|
||||
return new DarwinAArch64AsmBackend(T, TheTriple, MRI);
|
||||
|
||||
@ -624,10 +624,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple,
|
||||
StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TheTriple = STI.getTargetTriple();
|
||||
assert(TheTriple.isOSBinFormatELF() &&
|
||||
"Big endian is only supported for ELF targets!");
|
||||
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
|
||||
|
@ -45,12 +45,12 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
MCAsmBackend *createAArch64leAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
MCAsmBackend *createAArch64beAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -815,6 +815,10 @@ class KernelScopeInfo {
|
||||
class AMDGPUAsmParser : public MCTargetAsmParser {
|
||||
MCAsmParser &Parser;
|
||||
|
||||
// Number of extra operands parsed after the first optional operand.
|
||||
// This may be necessary to skip hardcoded mandatory operands.
|
||||
static const unsigned MAX_OPR_LOOKAHEAD = 1;
|
||||
|
||||
unsigned ForcedEncodingSize = 0;
|
||||
bool ForcedDPP = false;
|
||||
bool ForcedSDWA = false;
|
||||
@ -1037,6 +1041,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
|
||||
|
||||
public:
|
||||
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
|
||||
OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
|
||||
|
||||
OperandMatchResultTy parseExpTgt(OperandVector &Operands);
|
||||
OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
|
||||
@ -3859,7 +3864,7 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
|
||||
} else {
|
||||
// Swizzle "offset" operand is optional.
|
||||
// If it is omitted, try parsing other optional operands.
|
||||
return parseOptionalOperand(Operands);
|
||||
return parseOptionalOpr(Operands);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4179,6 +4184,39 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
||||
};
|
||||
|
||||
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
|
||||
unsigned size = Operands.size();
|
||||
assert(size > 0);
|
||||
|
||||
OperandMatchResultTy res = parseOptionalOpr(Operands);
|
||||
|
||||
// This is a hack to enable hardcoded mandatory operands which follow
|
||||
// optional operands.
|
||||
//
|
||||
// Current design assumes that all operands after the first optional operand
|
||||
// are also optional. However implementation of some instructions violates
|
||||
// this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
|
||||
//
|
||||
// To alleviate this problem, we have to (implicitly) parse extra operands
|
||||
// to make sure autogenerated parser of custom operands never hit hardcoded
|
||||
// mandatory operands.
|
||||
|
||||
if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
|
||||
|
||||
// We have parsed the first optional operand.
|
||||
// Parse as many operands as necessary to skip all mandatory operands.
|
||||
|
||||
for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
|
||||
if (res != MatchOperand_Success ||
|
||||
getLexer().is(AsmToken::EndOfStatement)) break;
|
||||
if (getLexer().is(AsmToken::Comma)) Parser.Lex();
|
||||
res = parseOptionalOpr(Operands);
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
|
||||
OperandMatchResultTy res;
|
||||
for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
|
||||
// try to parse any optional operand here
|
||||
|
@ -198,9 +198,9 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
|
||||
} // end anonymous namespace
|
||||
|
||||
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
// Use 64-bit ELF for amdgcn
|
||||
return new ELFAMDGPUAsmBackend(T, TT);
|
||||
return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
|
||||
}
|
||||
|
@ -45,8 +45,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -71,9 +71,9 @@ class MIMG_Store_Helper <bits<7> op, string asm,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
|
||||
let ssamp = 0;
|
||||
let mayLoad = 1; // TableGen requires this for matching with the intrinsics
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
let hasSideEffects = 0;
|
||||
let hasPostISelHook = 0;
|
||||
let DisableWQM = 1;
|
||||
}
|
||||
@ -103,10 +103,10 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
|
||||
(ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
|
||||
dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
|
||||
r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
|
||||
asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
|
||||
> {
|
||||
asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> {
|
||||
let mayLoad = 1;
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
let hasSideEffects = 1; // FIXME: Remove this
|
||||
let hasPostISelHook = 0;
|
||||
let DisableWQM = 1;
|
||||
let Constraints = "$vdst = $vdata";
|
||||
|
@ -575,6 +575,221 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Image load.
|
||||
case Intrinsic::amdgcn_image_load:
|
||||
case Intrinsic::amdgcn_image_load_mip:
|
||||
|
||||
// Sample.
|
||||
case Intrinsic::amdgcn_image_sample:
|
||||
case Intrinsic::amdgcn_image_sample_cl:
|
||||
case Intrinsic::amdgcn_image_sample_d:
|
||||
case Intrinsic::amdgcn_image_sample_d_cl:
|
||||
case Intrinsic::amdgcn_image_sample_l:
|
||||
case Intrinsic::amdgcn_image_sample_b:
|
||||
case Intrinsic::amdgcn_image_sample_b_cl:
|
||||
case Intrinsic::amdgcn_image_sample_lz:
|
||||
case Intrinsic::amdgcn_image_sample_cd:
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl:
|
||||
|
||||
// Sample with comparison.
|
||||
case Intrinsic::amdgcn_image_sample_c:
|
||||
case Intrinsic::amdgcn_image_sample_c_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_d:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_l:
|
||||
case Intrinsic::amdgcn_image_sample_c_b:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl:
|
||||
case Intrinsic::amdgcn_image_sample_c_lz:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl:
|
||||
|
||||
// Sample with offsets.
|
||||
case Intrinsic::amdgcn_image_sample_o:
|
||||
case Intrinsic::amdgcn_image_sample_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_d_o:
|
||||
case Intrinsic::amdgcn_image_sample_d_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_l_o:
|
||||
case Intrinsic::amdgcn_image_sample_b_o:
|
||||
case Intrinsic::amdgcn_image_sample_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_lz_o:
|
||||
case Intrinsic::amdgcn_image_sample_cd_o:
|
||||
case Intrinsic::amdgcn_image_sample_cd_cl_o:
|
||||
|
||||
// Sample with comparison and offsets.
|
||||
case Intrinsic::amdgcn_image_sample_c_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_d_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_l_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_lz_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_o:
|
||||
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
|
||||
|
||||
// Basic gather4
|
||||
case Intrinsic::amdgcn_image_gather4:
|
||||
case Intrinsic::amdgcn_image_gather4_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_l:
|
||||
case Intrinsic::amdgcn_image_gather4_b:
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_lz:
|
||||
|
||||
// Gather4 with comparison
|
||||
case Intrinsic::amdgcn_image_gather4_c:
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_c_l:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl:
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz:
|
||||
|
||||
// Gather4 with offsets
|
||||
case Intrinsic::amdgcn_image_gather4_o:
|
||||
case Intrinsic::amdgcn_image_gather4_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_l_o:
|
||||
case Intrinsic::amdgcn_image_gather4_b_o:
|
||||
case Intrinsic::amdgcn_image_gather4_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_lz_o:
|
||||
|
||||
// Gather4 with comparison and offsets
|
||||
case Intrinsic::amdgcn_image_gather4_c_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_l_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
|
||||
case Intrinsic::amdgcn_image_gather4_c_lz_o: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = MFI->getImagePSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(1));
|
||||
Info.align = 0;
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_image_store:
|
||||
case Intrinsic::amdgcn_image_store_mip: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
|
||||
Info.ptrVal = MFI->getImagePSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(2));
|
||||
Info.flags = MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
Info.align = 0;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_image_atomic_swap:
|
||||
case Intrinsic::amdgcn_image_atomic_add:
|
||||
case Intrinsic::amdgcn_image_atomic_sub:
|
||||
case Intrinsic::amdgcn_image_atomic_smin:
|
||||
case Intrinsic::amdgcn_image_atomic_umin:
|
||||
case Intrinsic::amdgcn_image_atomic_smax:
|
||||
case Intrinsic::amdgcn_image_atomic_umax:
|
||||
case Intrinsic::amdgcn_image_atomic_and:
|
||||
case Intrinsic::amdgcn_image_atomic_or:
|
||||
case Intrinsic::amdgcn_image_atomic_xor:
|
||||
case Intrinsic::amdgcn_image_atomic_inc:
|
||||
case Intrinsic::amdgcn_image_atomic_dec: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = MFI->getImagePSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(2));
|
||||
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
|
||||
// XXX - Should this be volatile without known ordering?
|
||||
Info.flags |= MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_image_atomic_cmpswap: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.ptrVal = MFI->getImagePSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(3));
|
||||
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
|
||||
// XXX - Should this be volatile without known ordering?
|
||||
Info.flags |= MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_tbuffer_load:
|
||||
case Intrinsic::amdgcn_buffer_load:
|
||||
case Intrinsic::amdgcn_buffer_load_format: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.ptrVal = MFI->getBufferPSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(0));
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
|
||||
// There is a constant offset component, but there are additional register
|
||||
// offsets which could break AA if we set the offset to anything non-0.
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_tbuffer_store:
|
||||
case Intrinsic::amdgcn_buffer_store:
|
||||
case Intrinsic::amdgcn_buffer_store_format: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_VOID;
|
||||
Info.ptrVal = MFI->getBufferPSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(1));
|
||||
Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
|
||||
Info.flags = MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_buffer_atomic_swap:
|
||||
case Intrinsic::amdgcn_buffer_atomic_add:
|
||||
case Intrinsic::amdgcn_buffer_atomic_sub:
|
||||
case Intrinsic::amdgcn_buffer_atomic_smin:
|
||||
case Intrinsic::amdgcn_buffer_atomic_umin:
|
||||
case Intrinsic::amdgcn_buffer_atomic_smax:
|
||||
case Intrinsic::amdgcn_buffer_atomic_umax:
|
||||
case Intrinsic::amdgcn_buffer_atomic_and:
|
||||
case Intrinsic::amdgcn_buffer_atomic_or:
|
||||
case Intrinsic::amdgcn_buffer_atomic_xor: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.ptrVal = MFI->getBufferPSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(1));
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
Info.opc = ISD::INTRINSIC_W_CHAIN;
|
||||
Info.ptrVal = MFI->getBufferPSV(
|
||||
*MF.getSubtarget<SISubtarget>().getInstrInfo(),
|
||||
CI.getArgOperand(2));
|
||||
Info.memVT = MVT::getVT(CI.getType());
|
||||
Info.flags = MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@ -2946,24 +3161,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
if (TII->isMIMG(MI)) {
|
||||
if (!MI.memoperands_empty())
|
||||
return BB;
|
||||
if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
|
||||
report_fatal_error("missing mem operand from MIMG instruction");
|
||||
}
|
||||
// Add a memoperand for mimg instructions so that they aren't assumed to
|
||||
// be ordered memory instuctions.
|
||||
|
||||
MachinePointerInfo PtrInfo(MFI->getImagePSV());
|
||||
MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
|
||||
if (MI.mayStore())
|
||||
Flags |= MachineMemOperand::MOStore;
|
||||
|
||||
if (MI.mayLoad())
|
||||
Flags |= MachineMemOperand::MOLoad;
|
||||
|
||||
if (Flags != MachineMemOperand::MODereferenceable) {
|
||||
auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
|
||||
MI.addMemOperand(*MF, MMO);
|
||||
}
|
||||
|
||||
return BB;
|
||||
}
|
||||
|
||||
@ -4257,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDLoc DL(Op);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_atomic_inc:
|
||||
@ -4284,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
Op.getOperand(5), // glc
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
|
||||
AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
|
||||
EVT VT = Op.getValueType();
|
||||
EVT IntVT = VT.changeTypeToInteger();
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(MFI->getBufferPSV()),
|
||||
MachineMemOperand::MOLoad,
|
||||
VT.getStoreSize(), VT.getStoreSize());
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
|
||||
auto *M = cast<MemSDNode>(Op);
|
||||
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
|
||||
M->getMemOperand());
|
||||
}
|
||||
case Intrinsic::amdgcn_tbuffer_load: {
|
||||
MemSDNode *M = cast<MemSDNode>(Op);
|
||||
SDValue Ops[] = {
|
||||
Op.getOperand(0), // Chain
|
||||
Op.getOperand(2), // rsrc
|
||||
@ -4312,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
Op.getOperand(10) // slc
|
||||
};
|
||||
|
||||
EVT VT = Op.getOperand(2).getValueType();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad,
|
||||
VT.getStoreSize(), VT.getStoreSize());
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
Op->getVTList(), Ops, VT, M->getMemOperand());
|
||||
}
|
||||
case Intrinsic::amdgcn_buffer_atomic_swap:
|
||||
case Intrinsic::amdgcn_buffer_atomic_add:
|
||||
@ -4339,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
Op.getOperand(5), // offset
|
||||
Op.getOperand(6) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(3).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile,
|
||||
VT.getStoreSize(), 4);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
auto *M = cast<MemSDNode>(Op);
|
||||
unsigned Opcode = 0;
|
||||
|
||||
switch (IntrID) {
|
||||
@ -4384,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
llvm_unreachable("unhandled atomic opcode");
|
||||
}
|
||||
|
||||
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
|
||||
return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
|
||||
M->getMemOperand());
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
|
||||
@ -4397,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
Op.getOperand(6), // offset
|
||||
Op.getOperand(7) // slc
|
||||
};
|
||||
EVT VT = Op.getOperand(4).getValueType();
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
MachinePointerInfo(),
|
||||
MachineMemOperand::MOLoad |
|
||||
MachineMemOperand::MOStore |
|
||||
MachineMemOperand::MODereferenceable |
|
||||
MachineMemOperand::MOVolatile,
|
||||
VT.getStoreSize(), 4);
|
||||
EVT VT = Op.getValueType();
|
||||
auto *M = cast<MemSDNode>(Op);
|
||||
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
Op->getVTList(), Ops, VT, M->getMemOperand());
|
||||
}
|
||||
|
||||
// Basic sample.
|
||||
|
@ -28,8 +28,6 @@ using namespace llvm;
|
||||
|
||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
: AMDGPUMachineFunction(MF),
|
||||
BufferPSV(*(MF.getSubtarget().getInstrInfo())),
|
||||
ImagePSV(*(MF.getSubtarget().getInstrInfo())),
|
||||
PrivateSegmentBuffer(false),
|
||||
DispatchPtr(false),
|
||||
QueuePtr(false),
|
||||
|
@ -34,12 +34,14 @@ namespace llvm {
|
||||
|
||||
class MachineFrameInfo;
|
||||
class MachineFunction;
|
||||
class SIInstrInfo;
|
||||
class TargetRegisterClass;
|
||||
|
||||
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
|
||||
public:
|
||||
// TODO: Is the img rsrc useful?
|
||||
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
|
||||
PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
|
||||
PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
|
||||
|
||||
bool isConstant(const MachineFrameInfo *) const override {
|
||||
// This should probably be true for most images, but we will start by being
|
||||
@ -135,8 +137,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
||||
// Stack object indices for work item IDs.
|
||||
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};
|
||||
|
||||
AMDGPUBufferPseudoSourceValue BufferPSV;
|
||||
AMDGPUImagePseudoSourceValue ImagePSV;
|
||||
DenseMap<const Value *,
|
||||
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
|
||||
DenseMap<const Value *,
|
||||
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
|
||||
|
||||
private:
|
||||
unsigned LDSWaveSpillSize = 0;
|
||||
@ -629,12 +633,22 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
||||
return LDSWaveSpillSize;
|
||||
}
|
||||
|
||||
const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
|
||||
return &BufferPSV;
|
||||
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
|
||||
const Value *BufferRsrc) {
|
||||
assert(BufferRsrc);
|
||||
auto PSV = BufferPSVs.try_emplace(
|
||||
BufferRsrc,
|
||||
llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
|
||||
return PSV.first->second.get();
|
||||
}
|
||||
|
||||
const AMDGPUImagePseudoSourceValue *getImagePSV() const {
|
||||
return &ImagePSV;
|
||||
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
|
||||
const Value *ImgRsrc) {
|
||||
assert(ImgRsrc);
|
||||
auto PSV = ImagePSVs.try_emplace(
|
||||
ImgRsrc,
|
||||
llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
|
||||
return PSV.first->second.get();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -172,8 +172,8 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
|
||||
}
|
||||
|
||||
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
|
||||
bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
|
||||
bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps];
|
||||
bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2];
|
||||
bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps];
|
||||
|
||||
switch (Op) {
|
||||
default:
|
||||
@ -389,7 +389,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
|
||||
case FK_SecRel_4:
|
||||
return Value;
|
||||
case ARM::fixup_arm_movt_hi16:
|
||||
if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
|
||||
if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
|
||||
Value >>= 16;
|
||||
LLVM_FALLTHROUGH;
|
||||
case ARM::fixup_arm_movw_lo16: {
|
||||
@ -401,7 +401,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
|
||||
return Value;
|
||||
}
|
||||
case ARM::fixup_t2_movt_hi16:
|
||||
if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
|
||||
if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
|
||||
Value >>= 16;
|
||||
LLVM_FALLTHROUGH;
|
||||
case ARM::fixup_t2_movw_lo16: {
|
||||
@ -591,7 +591,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
|
||||
case ARM::fixup_arm_thumb_cp:
|
||||
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
|
||||
// could have an error on our hands.
|
||||
if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
|
||||
if (!STI.getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
|
||||
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
|
||||
if (FixupDiagnostic) {
|
||||
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
|
||||
@ -615,8 +615,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
|
||||
}
|
||||
case ARM::fixup_arm_thumb_br:
|
||||
// Offset by 4 and don't encode the lower bit, which is always 0.
|
||||
if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
|
||||
!STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
|
||||
if (!STI.getFeatureBits()[ARM::FeatureThumb2] &&
|
||||
!STI.getFeatureBits()[ARM::HasV8MBaselineOps]) {
|
||||
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
|
||||
if (FixupDiagnostic) {
|
||||
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
|
||||
@ -626,7 +626,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
|
||||
return ((Value - 4) >> 1) & 0x7ff;
|
||||
case ARM::fixup_arm_thumb_bcc:
|
||||
// Offset by 4 and don't encode the lower bit, which is always 0.
|
||||
if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
|
||||
if (!STI.getFeatureBits()[ARM::FeatureThumb2]) {
|
||||
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
|
||||
if (FixupDiagnostic) {
|
||||
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
|
||||
@ -1154,51 +1154,52 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple, StringRef CPU,
|
||||
const MCTargetOptions &Options,
|
||||
bool isLittle) {
|
||||
const Triple &TheTriple = STI.getTargetTriple();
|
||||
switch (TheTriple.getObjectFormat()) {
|
||||
default:
|
||||
llvm_unreachable("unsupported object format");
|
||||
case Triple::MachO: {
|
||||
MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
|
||||
return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
|
||||
return new ARMAsmBackendDarwin(T, STI, MRI, CS);
|
||||
}
|
||||
case Triple::COFF:
|
||||
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
|
||||
return new ARMAsmBackendWinCOFF(T, TheTriple);
|
||||
return new ARMAsmBackendWinCOFF(T, STI);
|
||||
case Triple::ELF:
|
||||
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
|
||||
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
|
||||
return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle);
|
||||
return new ARMAsmBackendELF(T, STI, OSABI, isLittle);
|
||||
}
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
|
||||
return createARMAsmBackend(T, STI, MRI, Options, true);
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
|
||||
return createARMAsmBackend(T, STI, MRI, Options, false);
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
|
||||
return createARMAsmBackend(T, STI, MRI, Options, true);
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
|
||||
return createARMAsmBackend(T, STI, MRI, Options, false);
|
||||
}
|
||||
|
@ -19,22 +19,20 @@
|
||||
namespace llvm {
|
||||
|
||||
class ARMAsmBackend : public MCAsmBackend {
|
||||
const MCSubtargetInfo *STI;
|
||||
const MCSubtargetInfo &STI;
|
||||
bool isThumbMode; // Currently emitting Thumb code.
|
||||
bool IsLittleEndian; // Big or little endian.
|
||||
public:
|
||||
ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle)
|
||||
: MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
|
||||
isThumbMode(TT.getArchName().startswith("thumb")),
|
||||
ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI, bool IsLittle)
|
||||
: MCAsmBackend(), STI(STI),
|
||||
isThumbMode(STI.getTargetTriple().isThumb()),
|
||||
IsLittleEndian(IsLittle) {}
|
||||
|
||||
~ARMAsmBackend() override { delete STI; }
|
||||
|
||||
unsigned getNumFixupKinds() const override {
|
||||
return ARM::NumTargetFixupKinds;
|
||||
}
|
||||
|
||||
bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; }
|
||||
bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
|
||||
|
||||
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
|
||||
|
||||
|
@ -19,10 +19,10 @@ class ARMAsmBackendDarwin : public ARMAsmBackend {
|
||||
const MCRegisterInfo &MRI;
|
||||
public:
|
||||
const MachO::CPUSubTypeARM Subtype;
|
||||
ARMAsmBackendDarwin(const Target &T, const Triple &TT,
|
||||
ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
|
||||
: ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
|
||||
}
|
||||
: ARMAsmBackend(T, STI, /* IsLittleEndian */ true), MRI(MRI),
|
||||
Subtype(st) {}
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
createObjectWriter(raw_pwrite_stream &OS) const override {
|
||||
|
@ -20,9 +20,9 @@ namespace {
|
||||
class ARMAsmBackendELF : public ARMAsmBackend {
|
||||
public:
|
||||
uint8_t OSABI;
|
||||
ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI,
|
||||
ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI,
|
||||
bool IsLittle)
|
||||
: ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
|
||||
: ARMAsmBackend(T, STI, IsLittle), OSABI(OSABI) {}
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
createObjectWriter(raw_pwrite_stream &OS) const override {
|
||||
|
@ -17,8 +17,8 @@ using namespace llvm;
|
||||
namespace {
|
||||
class ARMAsmBackendWinCOFF : public ARMAsmBackend {
|
||||
public:
|
||||
ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple)
|
||||
: ARMAsmBackend(T, TheTriple, true) {}
|
||||
ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI)
|
||||
: ARMAsmBackend(T, STI, true) {}
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
createObjectWriter(raw_pwrite_stream &OS) const override {
|
||||
return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
|
||||
|
@ -68,27 +68,27 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createARMAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options,
|
||||
bool IsLittleEndian);
|
||||
|
||||
MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
MCAsmBackend *createThumbLEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
MCAsmBackend *createThumbBEAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
// Construct a PE/COFF machine code streamer which will generate a PE/COFF
|
||||
|
@ -476,10 +476,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
|
||||
}
|
||||
}
|
||||
|
||||
MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const llvm::MCTargetOptions &TO) {
|
||||
return new AVRAsmBackend(TT.getOS());
|
||||
return new AVRAsmBackend(STI.getTargetTriple().getOS());
|
||||
}
|
||||
|
||||
} // end of namespace llvm
|
||||
|
@ -26,6 +26,7 @@ class MCContext;
|
||||
class MCInstrInfo;
|
||||
class MCObjectWriter;
|
||||
class MCRegisterInfo;
|
||||
class MCSubtargetInfo;
|
||||
class MCTargetOptions;
|
||||
class StringRef;
|
||||
class Target;
|
||||
@ -42,8 +43,8 @@ MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
MCContext &Ctx);
|
||||
|
||||
/// Creates an assembly backend for AVR.
|
||||
MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const llvm::MCTargetOptions &TO);
|
||||
|
||||
/// Creates an ELF object writer for AVR.
|
||||
|
@ -104,15 +104,15 @@ BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions&) {
|
||||
const MCTargetOptions &) {
|
||||
return new BPFAsmBackend(/*IsLittleEndian=*/true);
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions&) {
|
||||
const MCTargetOptions &) {
|
||||
return new BPFAsmBackend(/*IsLittleEndian=*/false);
|
||||
}
|
||||
|
@ -45,11 +45,11 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter> createBPFELFObjectWriter(raw_pwrite_stream &OS,
|
||||
|
@ -1050,14 +1050,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
|
||||
// Check if the exit values have types that are no wider than the type
|
||||
// that we want to promote to.
|
||||
unsigned DestBW = DestTy->getBitWidth();
|
||||
for (Instruction &In : *ExitB) {
|
||||
PHINode *P = dyn_cast<PHINode>(&In);
|
||||
if (!P)
|
||||
break;
|
||||
if (P->getNumIncomingValues() != 1)
|
||||
for (PHINode &P : ExitB->phis()) {
|
||||
if (P.getNumIncomingValues() != 1)
|
||||
return false;
|
||||
assert(P->getIncomingBlock(0) == LoopB);
|
||||
IntegerType *T = dyn_cast<IntegerType>(P->getType());
|
||||
assert(P.getIncomingBlock(0) == LoopB);
|
||||
IntegerType *T = dyn_cast<IntegerType>(P.getType());
|
||||
if (!T || T->getBitWidth() > DestBW)
|
||||
return false;
|
||||
}
|
||||
|
@ -2925,6 +2925,23 @@ let Predicates = [UseHVX] in {
|
||||
def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
|
||||
def vzero: PatFrag<(ops), (HexagonVZERO)>;
|
||||
|
||||
def VSxtb: OutPatFrag<(ops node:$Vs),
|
||||
(V6_vshuffvdd (HiVec (V6_vsb $Vs)),
|
||||
(LoVec (V6_vsb $Vs)),
|
||||
(A2_tfrsi -2))>;
|
||||
def VSxth: OutPatFrag<(ops node:$Vs),
|
||||
(V6_vshuffvdd (HiVec (V6_vsh $Vs)),
|
||||
(LoVec (V6_vsh $Vs)),
|
||||
(A2_tfrsi -4))>;
|
||||
def VZxtb: OutPatFrag<(ops node:$Vs),
|
||||
(V6_vshuffvdd (HiVec (V6_vzb $Vs)),
|
||||
(LoVec (V6_vzb $Vs)),
|
||||
(A2_tfrsi -2))>;
|
||||
def VZxth: OutPatFrag<(ops node:$Vs),
|
||||
(V6_vshuffvdd (HiVec (V6_vzh $Vs)),
|
||||
(LoVec (V6_vzh $Vs)),
|
||||
(A2_tfrsi -4))>;
|
||||
|
||||
let Predicates = [UseHVX] in {
|
||||
def: Pat<(VecI8 vzero), (V6_vd0)>;
|
||||
def: Pat<(VecI16 vzero), (V6_vd0)>;
|
||||
@ -2970,25 +2987,18 @@ let Predicates = [UseHVX] in {
|
||||
def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
|
||||
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
|
||||
|
||||
def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>;
|
||||
def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>;
|
||||
def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>;
|
||||
def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>;
|
||||
def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
|
||||
def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
|
||||
def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>;
|
||||
def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
|
||||
|
||||
def: Pat<(sext_inreg HVI32:$Vs, v16i16),
|
||||
(V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
|
||||
(HiVec (V6_vsh HvxVR:$Vs)))>;
|
||||
def: Pat<(sext_inreg HVI32:$Vs, v32i16),
|
||||
(V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
|
||||
(HiVec (V6_vsh HvxVR:$Vs)))>;
|
||||
|
||||
def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>;
|
||||
def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>;
|
||||
def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
|
||||
def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
|
||||
def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
|
||||
(LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>;
|
||||
(LoVec (VSxth (LoVec (VSxtb $Vs))))>;
|
||||
|
||||
def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>;
|
||||
def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>;
|
||||
def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
|
||||
def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
|
||||
def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
|
||||
(LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>;
|
||||
(LoVec (VZxth (LoVec (VZxtb $Vs))))>;
|
||||
}
|
||||
|
@ -765,11 +765,12 @@ class HexagonAsmBackend : public MCAsmBackend {
|
||||
|
||||
// MCAsmBackend
|
||||
MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
|
||||
MCRegisterInfo const & /*MRI*/,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const MCSubtargetInfo &STI,
|
||||
MCRegisterInfo const & /*MRI*/,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TT = STI.getTargetTriple();
|
||||
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
|
||||
|
||||
StringRef CPUString = Hexagon_MC::selectHexagonCPU(CPU);
|
||||
StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU());
|
||||
return new HexagonAsmBackend(T, TT, OSABI, CPUString);
|
||||
}
|
||||
|
@ -61,8 +61,8 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
MCContext &MCT);
|
||||
|
||||
MCAsmBackend *createHexagonAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -165,9 +165,10 @@ LanaiAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
|
||||
} // namespace
|
||||
|
||||
MCAsmBackend *llvm::createLanaiAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo & /*MRI*/,
|
||||
const Triple &TT, StringRef /*CPU*/,
|
||||
const MCTargetOptions & /*Options*/) {
|
||||
const Triple &TT = STI.getTargetTriple();
|
||||
if (!TT.isOSBinFormatELF())
|
||||
llvm_unreachable("OS not supported");
|
||||
|
||||
|
@ -38,8 +38,8 @@ MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple, StringRef CPU,
|
||||
MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -476,8 +476,9 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return new MipsAsmBackend(T, MRI, TT, CPU, Options.ABIName == "n32");
|
||||
return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
|
||||
Options.ABIName == "n32");
|
||||
}
|
||||
|
@ -45,8 +45,8 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createMipsAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -3863,13 +3863,17 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
case 'c': // register suitable for indirect jump
|
||||
if (VT == MVT::i32)
|
||||
return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass);
|
||||
assert(VT == MVT::i64 && "Unexpected type.");
|
||||
return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
|
||||
case 'l': // register suitable for indirect jump
|
||||
if (VT == MVT::i64)
|
||||
return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
|
||||
// This will generate an error message
|
||||
return std::make_pair(0U, nullptr);
|
||||
case 'l': // use the `lo` register to store values
|
||||
// that are no bigger than a word
|
||||
if (VT == MVT::i32)
|
||||
return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
|
||||
return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
|
||||
case 'x': // register suitable for indirect jump
|
||||
case 'x': // use the concatenated `hi` and `lo` registers
|
||||
// to store doubleword values
|
||||
// Fixme: Not triggering the use of both hi and low
|
||||
// This will generate an error message
|
||||
return std::make_pair(0U, nullptr);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "llvm/MC/MCMachObjectWriter.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSectionMachO.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCSymbolELF.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
@ -231,9 +232,10 @@ namespace {
|
||||
} // end anonymous namespace
|
||||
|
||||
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TT = STI.getTargetTriple();
|
||||
if (TT.isOSDarwin())
|
||||
return new DarwinPPCAsmBackend(T);
|
||||
|
||||
|
@ -29,6 +29,7 @@ class MCContext;
|
||||
class MCInstrInfo;
|
||||
class MCObjectWriter;
|
||||
class MCRegisterInfo;
|
||||
class MCSubtargetInfo;
|
||||
class MCTargetOptions;
|
||||
class Target;
|
||||
class Triple;
|
||||
@ -43,8 +44,8 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
/// Construct an PPC ELF object writer.
|
||||
|
@ -4397,13 +4397,18 @@ hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
|
||||
static bool
|
||||
areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
|
||||
CallingConv::ID CalleeCC) {
|
||||
// Tail or Sibling call optimization (TCO/SCO) needs callee and caller to
|
||||
// have the same calling convention.
|
||||
if (CallerCC != CalleeCC)
|
||||
// Tail calls are possible with fastcc and ccc.
|
||||
auto isTailCallableCC = [] (CallingConv::ID CC){
|
||||
return CC == CallingConv::C || CC == CallingConv::Fast;
|
||||
};
|
||||
if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
|
||||
return false;
|
||||
|
||||
// Tail or Sibling calls can be done with fastcc/ccc.
|
||||
return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C);
|
||||
// We can safely tail call both fastcc and ccc callees from a c calling
|
||||
// convention caller. If the caller is fastcc, we may have less stack space
|
||||
// than a non-fastcc caller with the same signature so disable tail-calls in
|
||||
// that case.
|
||||
return CallerCC == CallingConv::C || CallerCC == CalleeCC;
|
||||
}
|
||||
|
||||
bool
|
||||
@ -4434,10 +4439,28 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
|
||||
// Callee contains any byval parameter is not supported, too.
|
||||
// Note: This is a quick work around, because in some cases, e.g.
|
||||
// caller's stack size > callee's stack size, we are still able to apply
|
||||
// sibling call optimization. See: https://reviews.llvm.org/D23441#513574
|
||||
// sibling call optimization. For example, gcc is able to do SCO for caller1
|
||||
// in the following example, but not for caller2.
|
||||
// struct test {
|
||||
// long int a;
|
||||
// char ary[56];
|
||||
// } gTest;
|
||||
// __attribute__((noinline)) int callee(struct test v, struct test *b) {
|
||||
// b->a = v.a;
|
||||
// return 0;
|
||||
// }
|
||||
// void caller1(struct test a, struct test c, struct test *b) {
|
||||
// callee(gTest, b); }
|
||||
// void caller2(struct test *b) { callee(gTest, b); }
|
||||
if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
|
||||
return false;
|
||||
|
||||
// If callee and caller use different calling conventions, we cannot pass
|
||||
// parameters on stack since offsets for the parameter area may be different.
|
||||
if (Caller.getCallingConv() != CalleeCC &&
|
||||
needStackSlotPassParameters(Subtarget, Outs))
|
||||
return false;
|
||||
|
||||
// No TCO/SCO on indirect call because Caller have to restore its TOC
|
||||
if (!isFunctionGlobalAddress(Callee) &&
|
||||
!isa<ExternalSymbolSDNode>(Callee))
|
||||
|
@ -2433,7 +2433,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
||||
int64_t MB = MI.getOperand(3).getImm();
|
||||
APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
|
||||
InVal = InVal.rotl(SH);
|
||||
uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
|
||||
uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
|
||||
InVal &= Mask;
|
||||
// Can't replace negative values with an LI as that will sign-extend
|
||||
// and not clear the left bits. If we're setting the CR bit, we will use
|
||||
@ -2457,8 +2457,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
|
||||
int64_t ME = MI.getOperand(4).getImm();
|
||||
APInt InVal(32, SExtImm, true);
|
||||
InVal = InVal.rotl(SH);
|
||||
// Set the bits ( MB + 32 ) to ( ME + 32 ).
|
||||
uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
|
||||
// Set the bits ( MB + 32 ) to ( ME + 32 ).
|
||||
uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
|
||||
InVal &= Mask;
|
||||
// Can't replace negative values with an LI as that will sign-extend
|
||||
// and not clear the left bits. If we're setting the CR bit, we will use
|
||||
@ -2527,6 +2527,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
|
||||
III.ConstantOpNo = 2;
|
||||
III.ImmWidth = 16;
|
||||
III.ImmMustBeMultipleOf = 1;
|
||||
III.TruncateImmTo = 0;
|
||||
switch (Opc) {
|
||||
default: return false;
|
||||
case PPC::ADD4:
|
||||
@ -2600,10 +2601,6 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
|
||||
case PPC::RLWNM8:
|
||||
case PPC::RLWNMo:
|
||||
case PPC::RLWNM8o:
|
||||
case PPC::RLDCL:
|
||||
case PPC::RLDCLo:
|
||||
case PPC::RLDCR:
|
||||
case PPC::RLDCRo:
|
||||
case PPC::SLW:
|
||||
case PPC::SLW8:
|
||||
case PPC::SLWo:
|
||||
@ -2614,6 +2611,50 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
|
||||
case PPC::SRW8o:
|
||||
case PPC::SRAW:
|
||||
case PPC::SRAWo:
|
||||
III.SignedImm = false;
|
||||
III.ZeroIsSpecialOrig = 0;
|
||||
III.ZeroIsSpecialNew = 0;
|
||||
III.IsCommutative = false;
|
||||
// This isn't actually true, but the instructions ignore any of the
|
||||
// upper bits, so any immediate loaded with an LI is acceptable.
|
||||
// This does not apply to shift right algebraic because a value
|
||||
// out of range will produce a -1/0.
|
||||
III.ImmWidth = 16;
|
||||
if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
|
||||
Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
|
||||
III.TruncateImmTo = 5;
|
||||
else
|
||||
III.TruncateImmTo = 6;
|
||||
switch(Opc) {
|
||||
default: llvm_unreachable("Unknown opcode");
|
||||
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::SRAW:
|
||||
III.ImmWidth = 5;
|
||||
III.TruncateImmTo = 0;
|
||||
III.ImmOpcode = PPC::SRAWI;
|
||||
break;
|
||||
case PPC::SRAWo:
|
||||
III.ImmWidth = 5;
|
||||
III.TruncateImmTo = 0;
|
||||
III.ImmOpcode = PPC::SRAWIo;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PPC::RLDCL:
|
||||
case PPC::RLDCLo:
|
||||
case PPC::RLDCR:
|
||||
case PPC::RLDCRo:
|
||||
case PPC::SLD:
|
||||
case PPC::SLDo:
|
||||
case PPC::SRD:
|
||||
@ -2626,33 +2667,34 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
|
||||
III.IsCommutative = false;
|
||||
// This isn't actually true, but the instructions ignore any of the
|
||||
// upper bits, so any immediate loaded with an LI is acceptable.
|
||||
// This does not apply to shift right algebraic because a value
|
||||
// out of range will produce a -1/0.
|
||||
III.ImmWidth = 16;
|
||||
if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
|
||||
Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
|
||||
III.TruncateImmTo = 6;
|
||||
else
|
||||
III.TruncateImmTo = 7;
|
||||
switch(Opc) {
|
||||
default: llvm_unreachable("Unknown opcode");
|
||||
case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
|
||||
case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
|
||||
case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
|
||||
case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
|
||||
case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
|
||||
case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
|
||||
case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
|
||||
case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
|
||||
case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
|
||||
case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
|
||||
case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
|
||||
case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
|
||||
case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
|
||||
case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
|
||||
case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
|
||||
case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
|
||||
case PPC::SRAD:
|
||||
III.ImmWidth = 6;
|
||||
III.TruncateImmTo = 0;
|
||||
III.ImmOpcode = PPC::SRADI;
|
||||
break;
|
||||
case PPC::SRADo:
|
||||
III.ImmWidth = 6;
|
||||
III.TruncateImmTo = 0;
|
||||
III.ImmOpcode = PPC::SRADIo;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
// Loads and stores:
|
||||
@ -2866,6 +2908,8 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
|
||||
return false;
|
||||
if (Imm % III.ImmMustBeMultipleOf)
|
||||
return false;
|
||||
if (III.TruncateImmTo)
|
||||
Imm &= ((1 << III.TruncateImmTo) - 1);
|
||||
if (III.SignedImm) {
|
||||
APInt ActualValue(64, Imm, true);
|
||||
if (!ActualValue.isSignedIntN(III.ImmWidth))
|
||||
|
@ -97,6 +97,8 @@ struct ImmInstrInfo {
|
||||
uint64_t ImmOpcode : 16;
|
||||
// The size of the immediate.
|
||||
uint64_t ImmWidth : 5;
|
||||
// The immediate should be truncated to N bits.
|
||||
uint64_t TruncateImmTo : 5;
|
||||
};
|
||||
|
||||
// Information required to convert an instruction to just a materialized
|
||||
|
@ -55,7 +55,7 @@ FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
|
||||
"convert reg-reg instructions to reg-imm"));
|
||||
|
||||
static cl::opt<bool>
|
||||
ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(false),
|
||||
ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
|
||||
cl::desc("Convert eligible reg+reg instructions to reg+imm"));
|
||||
|
||||
static cl::opt<bool>
|
||||
|
@ -35,7 +35,7 @@ STATISTIC(NumRemovedInPreEmit,
|
||||
"Number of instructions deleted in pre-emit peephole");
|
||||
|
||||
static cl::opt<bool>
|
||||
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(false),
|
||||
RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
|
||||
cl::desc("Run pre-emit peephole optimizations."));
|
||||
|
||||
namespace {
|
||||
|
@ -230,9 +230,10 @@ RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
|
||||
} // end anonymous namespace
|
||||
|
||||
MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TT = STI.getTargetTriple();
|
||||
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
|
||||
return new RISCVAsmBackend(OSABI, TT.isArch64Bit());
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
|
@ -580,7 +580,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||
}
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MVT XLenVT = Subtarget.getXLenVT();
|
||||
EVT PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
if (IsVarArg)
|
||||
@ -593,7 +592,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
|
||||
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
|
||||
assert(VA.getLocVT() == Subtarget.getXLenVT() && "Unhandled argument type");
|
||||
SDValue ArgValue;
|
||||
if (VA.isRegLoc())
|
||||
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
|
||||
|
@ -177,7 +177,7 @@ class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,
|
||||
|
||||
let Predicates = [HasStdExtC] in {
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in
|
||||
def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
|
||||
(ins SP:$rs1, uimm10_lsb00nonzero:$imm),
|
||||
"c.addi4spn", "$rd, $rs1, $imm"> {
|
||||
@ -260,7 +260,7 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
|
||||
}
|
||||
|
||||
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
|
||||
DecoderNamespace = "RISCV32Only_" in
|
||||
DecoderNamespace = "RISCV32Only_", Defs = [X1] in
|
||||
def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset),
|
||||
"c.jal", "$offset">,
|
||||
Requires<[IsRV32]>;
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCFixupKindInfo.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
@ -301,8 +302,8 @@ namespace {
|
||||
} // end anonymous namespace
|
||||
|
||||
MCAsmBackend *llvm::createSparcAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
return new ELFSparcAsmBackend(T, TT.getOS());
|
||||
return new ELFSparcAsmBackend(T, STI.getTargetTriple().getOS());
|
||||
}
|
||||
|
@ -40,8 +40,8 @@ Target &getTheSparcelTarget();
|
||||
MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
std::unique_ptr<MCObjectWriter>
|
||||
createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "llvm/MC/MCFixupKindInfo.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -122,9 +123,10 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
|
||||
uint8_t OSABI =
|
||||
MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS());
|
||||
return new SystemZMCAsmBackend(OSABI);
|
||||
}
|
||||
|
@ -89,8 +89,8 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
std::unique_ptr<MCObjectWriter> createSystemZObjectWriter(raw_pwrite_stream &OS,
|
||||
|
@ -69,10 +69,10 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
|
||||
}
|
||||
|
||||
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo & /*MRI*/,
|
||||
const Triple &TT, StringRef /*CPU*/,
|
||||
const MCTargetOptions & /*Options*/) {
|
||||
return createWebAssemblyAsmBackend(TT);
|
||||
return createWebAssemblyAsmBackend(STI.getTargetTriple());
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
|
||||
|
@ -843,10 +843,11 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
|
||||
} // end anonymous namespace
|
||||
|
||||
MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple,
|
||||
StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TheTriple = STI.getTargetTriple();
|
||||
StringRef CPU = STI.getCPU();
|
||||
if (TheTriple.isOSBinFormatMachO())
|
||||
return new DarwinX86_32AsmBackend(T, MRI, CPU);
|
||||
|
||||
@ -862,10 +863,11 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const Triple &TheTriple,
|
||||
StringRef CPU,
|
||||
const MCTargetOptions &Options) {
|
||||
const Triple &TheTriple = STI.getTargetTriple();
|
||||
StringRef CPU = STI.getCPU();
|
||||
if (TheTriple.isOSBinFormatMachO()) {
|
||||
MachO::CPUSubTypeX86 CS =
|
||||
StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
|
||||
|
@ -70,11 +70,13 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCRegisterInfo &MRI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createX86_32AsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
|
||||
const Triple &TT, StringRef CPU,
|
||||
MCAsmBackend *createX86_64AsmBackend(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCTargetOptions &Options);
|
||||
|
||||
/// Implements X86-only directives for assembly emission.
|
||||
|
@ -166,81 +166,6 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Check if register \p Reg is live after the \p MI.
|
||||
///
|
||||
/// \p LiveRegs should be in a state describing liveness information in
|
||||
/// that exact place as this function tries to precise analysis made
|
||||
/// by \p LiveRegs by exploiting the information about particular
|
||||
/// instruction \p MI. \p MI is expected to be one of the MOVs handled
|
||||
/// by the x86FixupBWInsts pass.
|
||||
/// Note: similar to LivePhysRegs::contains this would state that
|
||||
/// super-register is not used if only some part of it is used.
|
||||
///
|
||||
/// X86 backend does not have subregister liveness tracking enabled,
|
||||
/// so liveness information might be overly conservative. However, for
|
||||
/// some specific instructions (this pass only cares about MOVs) we can
|
||||
/// produce more precise results by analysing that MOV's operands.
|
||||
///
|
||||
/// Indeed, if super-register is not live before the mov it means that it
|
||||
/// was originally <read-undef> and so we are free to modify these
|
||||
/// undef upper bits. That may happen in case where the use is in another MBB
|
||||
/// and the vreg/physreg corresponding to the move has higher width than
|
||||
/// necessary (e.g. due to register coalescing with a "truncate" copy).
|
||||
/// So, it handles pattern like this:
|
||||
///
|
||||
/// %bb.2: derived from LLVM BB %if.then
|
||||
/// Live Ins: %rdi
|
||||
/// Predecessors according to CFG: %bb.0
|
||||
/// %ax = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax;
|
||||
/// mem:LD2[%p]
|
||||
/// No implicit %eax
|
||||
/// Successors according to CFG: %bb.3(?%)
|
||||
///
|
||||
/// %bb.3: derived from LLVM BB %if.end
|
||||
/// Live Ins: %eax Only %ax is actually live
|
||||
/// Predecessors according to CFG: %bb.2 %bb.1
|
||||
/// %ax = KILL %ax, implicit killed %eax
|
||||
/// RET 0, %ax
|
||||
static bool isLive(const MachineInstr &MI,
|
||||
const LivePhysRegs &LiveRegs,
|
||||
const TargetRegisterInfo *TRI,
|
||||
unsigned Reg) {
|
||||
if (!LiveRegs.contains(Reg))
|
||||
return false;
|
||||
|
||||
unsigned Opc = MI.getOpcode(); (void)Opc;
|
||||
// These are the opcodes currently handled by the pass, if something
|
||||
// else will be added we need to ensure that new opcode has the same
|
||||
// properties.
|
||||
assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr ||
|
||||
Opc == X86::MOV16rr) &&
|
||||
"Unexpected opcode.");
|
||||
|
||||
bool IsDefined = false;
|
||||
for (auto &MO: MI.implicit_operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
|
||||
|
||||
for (MCSuperRegIterator Supers(Reg, TRI, true); Supers.isValid(); ++Supers) {
|
||||
if (*Supers == MO.getReg()) {
|
||||
if (MO.isDef())
|
||||
IsDefined = true;
|
||||
else
|
||||
return true; // SuperReg Imp-used' -> live before the MI
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reg is not Imp-def'ed -> it's live both before/after the instruction.
|
||||
if (!IsDefined)
|
||||
return true;
|
||||
|
||||
// Otherwise, the Reg is not live before the MI and the MOV can't
|
||||
// make it really live, so it's in fact dead even after the MI.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// \brief Check if after \p OrigMI the only portion of super register
|
||||
/// of the destination register of \p OrigMI that is alive is that
|
||||
/// destination register.
|
||||
@ -262,20 +187,84 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
|
||||
if (SubRegIdx == X86::sub_8bit_hi)
|
||||
return false;
|
||||
|
||||
if (isLive(*OrigMI, LiveRegs, TRI, SuperDestReg))
|
||||
return false;
|
||||
|
||||
if (SubRegIdx == X86::sub_8bit) {
|
||||
// In the case of byte registers, we also have to check that the upper
|
||||
// byte register is also dead. That is considered to be independent of
|
||||
// whether the super-register is dead.
|
||||
unsigned UpperByteReg =
|
||||
getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true);
|
||||
|
||||
if (isLive(*OrigMI, LiveRegs, TRI, UpperByteReg))
|
||||
return false;
|
||||
// If neither the destination-super register nor any applicable subregisters
|
||||
// are live after this instruction, then the super register is safe to use.
|
||||
if (!LiveRegs.contains(SuperDestReg)) {
|
||||
// If the original destination register was not the low 8-bit subregister
|
||||
// then the super register check is sufficient.
|
||||
if (SubRegIdx != X86::sub_8bit)
|
||||
return true;
|
||||
// If the original destination register was the low 8-bit subregister and
|
||||
// we also need to check the 16-bit subregister and the high 8-bit
|
||||
// subregister.
|
||||
if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) &&
|
||||
!LiveRegs.contains(getX86SubSuperRegister(SuperDestReg, 8,
|
||||
/*High=*/true)))
|
||||
return true;
|
||||
// Otherwise, we have a little more checking to do.
|
||||
}
|
||||
|
||||
// If we get here, the super-register destination (or some part of it) is
|
||||
// marked as live after the original instruction.
|
||||
//
|
||||
// The X86 backend does not have subregister liveness tracking enabled,
|
||||
// so liveness information might be overly conservative. Specifically, the
|
||||
// super register might be marked as live because it is implicitly defined
|
||||
// by the instruction we are examining.
|
||||
//
|
||||
// However, for some specific instructions (this pass only cares about MOVs)
|
||||
// we can produce more precise results by analysing that MOV's operands.
|
||||
//
|
||||
// Indeed, if super-register is not live before the mov it means that it
|
||||
// was originally <read-undef> and so we are free to modify these
|
||||
// undef upper bits. That may happen in case where the use is in another MBB
|
||||
// and the vreg/physreg corresponding to the move has higher width than
|
||||
// necessary (e.g. due to register coalescing with a "truncate" copy).
|
||||
// So, we would like to handle patterns like this:
|
||||
//
|
||||
// %bb.2: derived from LLVM BB %if.then
|
||||
// Live Ins: %rdi
|
||||
// Predecessors according to CFG: %bb.0
|
||||
// %ax<def> = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax
|
||||
// ; No implicit %eax
|
||||
// Successors according to CFG: %bb.3(?%)
|
||||
//
|
||||
// %bb.3: derived from LLVM BB %if.end
|
||||
// Live Ins: %eax Only %ax is actually live
|
||||
// Predecessors according to CFG: %bb.2 %bb.1
|
||||
// %ax = KILL %ax, implicit killed %eax
|
||||
// RET 0, %ax
|
||||
unsigned Opc = OrigMI->getOpcode(); (void)Opc;
|
||||
// These are the opcodes currently handled by the pass, if something
|
||||
// else will be added we need to ensure that new opcode has the same
|
||||
// properties.
|
||||
assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr ||
|
||||
Opc == X86::MOV16rr) &&
|
||||
"Unexpected opcode.");
|
||||
|
||||
bool IsDefined = false;
|
||||
for (auto &MO: OrigMI->implicit_operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
|
||||
assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
|
||||
|
||||
for (MCSuperRegIterator Supers(OrigDestReg, TRI, true); Supers.isValid();
|
||||
++Supers) {
|
||||
if (*Supers == MO.getReg()) {
|
||||
if (MO.isDef())
|
||||
IsDefined = true;
|
||||
else
|
||||
return false; // SuperReg Imp-used' -> live before the MI
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reg is not Imp-def'ed -> it's live both before/after the instruction.
|
||||
if (!IsDefined)
|
||||
return false;
|
||||
|
||||
// Otherwise, the Reg is not live before the MI and the MOV can't
|
||||
// make it really live, so it's in fact dead even after the MI.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -996,8 +996,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
|
||||
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
|
||||
// even though v8i16 is a legal type.
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
|
||||
@ -1151,15 +1151,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
|
||||
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i1, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i1, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i1, MVT::v4i32);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
|
||||
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
|
||||
if (Subtarget.hasVLX()) {
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
|
||||
}
|
||||
|
||||
// Extends of v16i1/v8i1 to 128-bit vectors.
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom);
|
||||
@ -1186,9 +1197,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
|
||||
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1,
|
||||
MVT::v16i1, MVT::v32i1, MVT::v64i1 })
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
|
||||
for (auto VT : { MVT::v1i1, MVT::v8i1 })
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
for (MVT VT : MVT::fp_vector_valuetypes())
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
|
||||
@ -1219,11 +1229,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i16, Promote);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v16i8, Promote);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
|
||||
setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
|
||||
|
||||
@ -1428,6 +1438,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
|
||||
for (auto VT : { MVT::v16i1, MVT::v32i1 })
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
// Extends from v32i1 masks to 256-bit vectors.
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
|
||||
@ -1540,6 +1552,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
|
||||
for (auto VT : { MVT::v2i1, MVT::v4i1 })
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
// Extends from v2i1/v4i1 masks to 128-bit vectors.
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
|
||||
@ -2140,6 +2154,10 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
|
||||
const SDLoc &Dl, SelectionDAG &DAG) {
|
||||
EVT ValVT = ValArg.getValueType();
|
||||
|
||||
if (ValVT == MVT::v1i1)
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
|
||||
DAG.getIntPtrConstant(0, Dl));
|
||||
|
||||
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
|
||||
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
|
||||
// Two stage lowering might be required
|
||||
@ -4625,6 +4643,14 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
|
||||
return Subtarget.hasLZCNT();
|
||||
}
|
||||
|
||||
bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
|
||||
EVT BitcastVT) const {
|
||||
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
|
||||
return false;
|
||||
|
||||
return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
|
||||
}
|
||||
|
||||
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
|
||||
const SelectionDAG &DAG) const {
|
||||
// Do not merge to float value size (128 bytes) if no implicit
|
||||
@ -7471,7 +7497,7 @@ static bool isAddSub(const BuildVectorSDNode *BV,
|
||||
}
|
||||
|
||||
/// Returns true if is possible to fold MUL and an idiom that has already been
|
||||
/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
|
||||
/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
|
||||
/// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
|
||||
/// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
|
||||
///
|
||||
@ -7708,6 +7734,10 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
|
||||
case ISD::AND:
|
||||
case ISD::XOR:
|
||||
case ISD::OR:
|
||||
// Don't do this if the buildvector is a splat - we'd replace one
|
||||
// constant with an entire vector.
|
||||
if (Op->getSplatValue())
|
||||
return SDValue();
|
||||
if (!TLI.isOperationLegalOrPromote(Opcode, VT))
|
||||
return SDValue();
|
||||
break;
|
||||
@ -11261,6 +11291,20 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
|
||||
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
|
||||
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
|
||||
|
||||
// Attempt to directly match PSHUFLW or PSHUFHW.
|
||||
if (isUndefOrInRange(LoMask, 0, 4) &&
|
||||
isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
|
||||
return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
|
||||
getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
|
||||
}
|
||||
if (isUndefOrInRange(HiMask, 4, 8) &&
|
||||
isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
|
||||
for (int i = 0; i != 4; ++i)
|
||||
HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
|
||||
return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
|
||||
getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
|
||||
}
|
||||
|
||||
SmallVector<int, 4> LoInputs;
|
||||
copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
|
||||
std::sort(LoInputs.begin(), LoInputs.end());
|
||||
@ -11280,13 +11324,11 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
|
||||
MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
|
||||
MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
|
||||
|
||||
// If we are splatting two values from one half - one to each half, then
|
||||
// we can shuffle that half so each is splatted to a dword, then splat those
|
||||
// to their respective halves.
|
||||
auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
|
||||
int DOffset) {
|
||||
int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
|
||||
int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
|
||||
// If we are shuffling values from one half - check how many different DWORD
|
||||
// pairs we need to create. If only 1 or 2 then we can perform this as a
|
||||
// PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below.
|
||||
auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask,
|
||||
ArrayRef<int> PSHUFDMask, unsigned ShufWOp) {
|
||||
V = DAG.getNode(ShufWOp, DL, VT, V,
|
||||
getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
|
||||
V = DAG.getBitcast(PSHUFDVT, V);
|
||||
@ -11295,10 +11337,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
|
||||
return DAG.getBitcast(VT, V);
|
||||
};
|
||||
|
||||
if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
|
||||
return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
|
||||
if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
|
||||
return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);
|
||||
if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
|
||||
int PSHUFDMask[4] = { -1, -1, -1, -1 };
|
||||
SmallVector<std::pair<int, int>, 4> DWordPairs;
|
||||
int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
|
||||
|
||||
// Collect the different DWORD pairs.
|
||||
for (int DWord = 0; DWord != 4; ++DWord) {
|
||||
int M0 = Mask[2 * DWord + 0];
|
||||
int M1 = Mask[2 * DWord + 1];
|
||||
M0 = (M0 >= 0 ? M0 % 4 : M0);
|
||||
M1 = (M1 >= 0 ? M1 % 4 : M1);
|
||||
if (M0 < 0 && M1 < 0)
|
||||
continue;
|
||||
|
||||
bool Match = false;
|
||||
for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
|
||||
auto &DWordPair = DWordPairs[j];
|
||||
if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) &&
|
||||
(M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) {
|
||||
DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first);
|
||||
DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second);
|
||||
PSHUFDMask[DWord] = DOffset + j;
|
||||
Match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!Match) {
|
||||
PSHUFDMask[DWord] = DOffset + DWordPairs.size();
|
||||
DWordPairs.push_back(std::make_pair(M0, M1));
|
||||
}
|
||||
}
|
||||
|
||||
if (DWordPairs.size() <= 2) {
|
||||
DWordPairs.resize(2, std::make_pair(-1, -1));
|
||||
int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
|
||||
DWordPairs[1].first, DWordPairs[1].second};
|
||||
if ((NumHToL + NumHToH) == 0)
|
||||
return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
|
||||
if ((NumLToL + NumLToH) == 0)
|
||||
return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW);
|
||||
}
|
||||
}
|
||||
|
||||
// Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
|
||||
// such inputs we can swap two of the dwords across the half mark and end up
|
||||
@ -15020,6 +15100,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
|
||||
return insert1BitVector(Op, DAG, Subtarget);
|
||||
}
|
||||
|
||||
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
|
||||
SelectionDAG &DAG) {
|
||||
assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
|
||||
"Only vXi1 extract_subvectors need custom lowering");
|
||||
|
||||
SDLoc dl(Op);
|
||||
SDValue Vec = Op.getOperand(0);
|
||||
SDValue Idx = Op.getOperand(1);
|
||||
|
||||
if (!isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
if (IdxVal == 0) // the operation is legal
|
||||
return Op;
|
||||
|
||||
MVT VecVT = Vec.getSimpleValueType();
|
||||
unsigned NumElems = VecVT.getVectorNumElements();
|
||||
|
||||
// Extend to natively supported kshift.
|
||||
MVT WideVecVT = VecVT;
|
||||
if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
|
||||
WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
|
||||
DAG.getUNDEF(WideVecVT), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Shift to the LSB.
|
||||
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Returns the appropriate wrapper opcode for a global reference.
|
||||
unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
|
||||
// References to absolute symbols are never PC-relative.
|
||||
@ -15545,19 +15661,13 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
|
||||
DAG.getUNDEF(SrcVT)));
|
||||
}
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(VT == MVT::v2f64 && "Unexpected type");
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(VT == MVT::v2f64 && "Unexpected type");
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
@ -15894,19 +16004,13 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
|
||||
}
|
||||
|
||||
switch (SrcVT.SimpleTy) {
|
||||
@ -16418,13 +16522,16 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
||||
if (InVT.getScalarSizeInBits() <= 16) {
|
||||
if (Subtarget.hasBWI()) {
|
||||
// legal, will go to VPMOVB2M, VPMOVW2M
|
||||
// Shift packed bytes not supported natively, bitcast to word
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
|
||||
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
|
||||
DAG.getBitcast(ExtVT, In),
|
||||
DAG.getConstant(ShiftInx, DL, ExtVT));
|
||||
ShiftNode = DAG.getBitcast(InVT, ShiftNode);
|
||||
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
|
||||
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
|
||||
// We need to shift to get the lsb into sign position.
|
||||
// Shift packed bytes not supported natively, bitcast to word
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
|
||||
In = DAG.getNode(ISD::SHL, DL, ExtVT,
|
||||
DAG.getBitcast(ExtVT, In),
|
||||
DAG.getConstant(ShiftInx, DL, ExtVT));
|
||||
In = DAG.getBitcast(InVT, In);
|
||||
}
|
||||
return DAG.getNode(X86ISD::CVT2MASK, DL, VT, In);
|
||||
}
|
||||
// Use TESTD/Q, extended vector to packed dword/qword.
|
||||
assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
|
||||
@ -16437,9 +16544,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
|
||||
ShiftInx = InVT.getScalarSizeInBits() - 1;
|
||||
}
|
||||
|
||||
SDValue ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
|
||||
DAG.getConstant(ShiftInx, DL, InVT));
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
|
||||
if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
|
||||
// We need to shift to get the lsb into sign position.
|
||||
In = DAG.getNode(ISD::SHL, DL, InVT, In,
|
||||
DAG.getConstant(ShiftInx, DL, InVT));
|
||||
}
|
||||
return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -16572,9 +16682,29 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
|
||||
if (VT.isVector()) {
|
||||
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
|
||||
SDValue Src = Op.getOperand(0);
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) {
|
||||
MVT ResVT = MVT::v4i32;
|
||||
MVT TruncVT = MVT::v4i1;
|
||||
unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
|
||||
if (!IsSigned && !Subtarget.hasVLX()) {
|
||||
// Widen to 512-bits.
|
||||
ResVT = MVT::v8i32;
|
||||
TruncVT = MVT::v8i1;
|
||||
Opc = ISD::FP_TO_UINT;
|
||||
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
|
||||
DAG.getUNDEF(MVT::v8f64),
|
||||
Src, DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
|
||||
Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
|
||||
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
|
||||
return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
|
||||
@ -18629,6 +18759,7 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
|
||||
DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op);
|
||||
Op = DAG.getBitcast(MVT::i8, Op);
|
||||
return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
|
||||
St->getMemOperand());
|
||||
}
|
||||
@ -18645,12 +18776,12 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
|
||||
DAG.getIntPtrConstant(16, dl));
|
||||
Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi);
|
||||
|
||||
SDValue BasePtrHi =
|
||||
DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||
DAG.getConstant(2, dl, BasePtr.getValueType()));
|
||||
SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);
|
||||
|
||||
SDValue StHi = DAG.getStore(St->getChain(), dl, Hi,
|
||||
BasePtrHi, St->getMemOperand());
|
||||
BasePtrHi, St->getPointerInfo().getWithOffset(2),
|
||||
MinAlign(St->getAlignment(), 2U),
|
||||
St->getMemOperand()->getFlags());
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi);
|
||||
}
|
||||
|
||||
@ -24545,6 +24676,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
|
||||
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
|
||||
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
|
||||
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
|
||||
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
|
||||
@ -29735,7 +29867,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
||||
/// by this operation to try to flow through the rest of the combiner
|
||||
/// the fact that they're unused.
|
||||
static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
|
||||
SDValue &Opnd0, SDValue &Opnd1,
|
||||
SDValue &Opnd0, SDValue &Opnd1,
|
||||
bool matchSubAdd = false) {
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -30309,9 +30441,35 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
|
||||
// (i16 movmsk (16i8 sext (v16i1 x)))
|
||||
// before the setcc result is scalarized on subtargets that don't have legal
|
||||
// vxi1 types.
|
||||
if (DCI.isBeforeLegalize())
|
||||
if (DCI.isBeforeLegalize()) {
|
||||
if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
|
||||
return V;
|
||||
|
||||
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
|
||||
// type, widen both sides to avoid a trip through memory.
|
||||
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
|
||||
Subtarget.hasVLX()) {
|
||||
SDLoc dl(N);
|
||||
N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
|
||||
N0 = DAG.getBitcast(MVT::v8i1, N0);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
|
||||
// type, widen both sides to avoid a trip through memory.
|
||||
if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
|
||||
Subtarget.hasVLX()) {
|
||||
SDLoc dl(N);
|
||||
unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
|
||||
SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
|
||||
Ops[0] = N0;
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
|
||||
N0 = DAG.getBitcast(MVT::i8, N0);
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
|
||||
}
|
||||
}
|
||||
|
||||
// Since MMX types are special and don't usually play with other vector types,
|
||||
// it's better to handle them early to be sure we emit efficient code by
|
||||
// avoiding store-load conversions.
|
||||
@ -30791,6 +30949,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
|
||||
if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
|
||||
return SDValue();
|
||||
|
||||
// Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
|
||||
if (X86ISD::VBROADCAST == Src.getOpcode() &&
|
||||
Src.getOperand(0).getValueType() == VT)
|
||||
return Src.getOperand(0);
|
||||
|
||||
// Resolve the target shuffle inputs and mask.
|
||||
SmallVector<int, 16> Mask;
|
||||
SmallVector<SDValue, 2> Ops;
|
||||
@ -36153,13 +36316,23 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
|
||||
assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
|
||||
|
||||
// We're looking for an oversized integer equality comparison, but ignore a
|
||||
// comparison with zero because that gets special treatment in EmitTest().
|
||||
// We're looking for an oversized integer equality comparison.
|
||||
SDValue X = SetCC->getOperand(0);
|
||||
SDValue Y = SetCC->getOperand(1);
|
||||
EVT OpVT = X.getValueType();
|
||||
unsigned OpSize = OpVT.getSizeInBits();
|
||||
if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
|
||||
if (!OpVT.isScalarInteger() || OpSize < 128)
|
||||
return SDValue();
|
||||
|
||||
// Ignore a comparison with zero because that gets special treatment in
|
||||
// EmitTest(). But make an exception for the special case of a pair of
|
||||
// logically-combined vector-sized operands compared to zero. This pattern may
|
||||
// be generated by the memcmp expansion pass with oversized integer compares
|
||||
// (see PR33325).
|
||||
bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR &&
|
||||
X.getOperand(0).getOpcode() == ISD::XOR &&
|
||||
X.getOperand(1).getOpcode() == ISD::XOR;
|
||||
if (isNullConstant(Y) && !IsOrXorXorCCZero)
|
||||
return SDValue();
|
||||
|
||||
// Bail out if we know that this is not really just an oversized integer.
|
||||
@ -36174,15 +36347,29 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
|
||||
if ((OpSize == 128 && Subtarget.hasSSE2()) ||
|
||||
(OpSize == 256 && Subtarget.hasAVX2())) {
|
||||
EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
|
||||
SDValue VecX = DAG.getBitcast(VecVT, X);
|
||||
SDValue VecY = DAG.getBitcast(VecVT, Y);
|
||||
|
||||
SDValue Cmp;
|
||||
if (IsOrXorXorCCZero) {
|
||||
// This is a bitwise-combined equality comparison of 2 pairs of vectors:
|
||||
// setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
|
||||
// Use 2 vector equality compares and 'and' the results before doing a
|
||||
// MOVMSK.
|
||||
SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0));
|
||||
SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
|
||||
SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
|
||||
SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
|
||||
SDValue Cmp1 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, A, B);
|
||||
SDValue Cmp2 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, C, D);
|
||||
Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
|
||||
} else {
|
||||
SDValue VecX = DAG.getBitcast(VecVT, X);
|
||||
SDValue VecY = DAG.getBitcast(VecVT, Y);
|
||||
Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
|
||||
}
|
||||
// If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
|
||||
// setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
|
||||
// setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
|
||||
// setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
|
||||
// setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
|
||||
SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
|
||||
SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
|
||||
SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
|
||||
MVT::i32);
|
||||
|
@ -1023,6 +1023,8 @@ namespace llvm {
|
||||
return NumElem > 2;
|
||||
}
|
||||
|
||||
bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
|
||||
|
||||
/// Intel processors have a unified instruction and data cache
|
||||
const char * getClearCacheBuiltinName() const override {
|
||||
return nullptr; // nothing to do, move along.
|
||||
|
@ -2701,11 +2701,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
|
||||
|
||||
// Load/store kreg
|
||||
let Predicates = [HasDQI] in {
|
||||
def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
|
||||
(KMOVBmk addr:$dst, VK8:$src)>;
|
||||
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
|
||||
(KMOVBkm addr:$src)>;
|
||||
|
||||
def : Pat<(store VK4:$src, addr:$dst),
|
||||
(KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
|
||||
def : Pat<(store VK2:$src, addr:$dst),
|
||||
@ -2745,22 +2740,10 @@ let Predicates = [HasAVX512, NoDQI] in {
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
|
||||
(KMOVWmk addr:$dst, VK16:$src)>;
|
||||
def : Pat<(v1i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
|
||||
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
|
||||
(KMOVWkm addr:$src)>;
|
||||
}
|
||||
let Predicates = [HasBWI] in {
|
||||
def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
|
||||
(KMOVDmk addr:$dst, VK32:$src)>;
|
||||
def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
|
||||
(KMOVDkm addr:$src)>;
|
||||
def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
|
||||
(KMOVQmk addr:$dst, VK64:$src)>;
|
||||
def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
|
||||
(KMOVQkm addr:$src)>;
|
||||
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
|
||||
def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
|
||||
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -3087,66 +3070,6 @@ defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
|
||||
|
||||
defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
|
||||
|
||||
|
||||
multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
|
||||
X86KVectorVTInfo To, Predicate prd> {
|
||||
let Predicates = [prd] in
|
||||
def :
|
||||
Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
|
||||
(To.KVT(COPY_TO_REGCLASS
|
||||
(!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
|
||||
(i8 imm:$imm8)), To.KRC))>;
|
||||
}
|
||||
|
||||
multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
|
||||
X86KVectorVTInfo To> {
|
||||
def :
|
||||
Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
|
||||
(To.KVT(COPY_TO_REGCLASS
|
||||
(KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
|
||||
(i8 imm:$imm8)), To.KRC))>;
|
||||
}
|
||||
|
||||
defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
|
||||
defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
|
||||
defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
|
||||
defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
|
||||
defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
|
||||
defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
|
||||
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
|
||||
defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
|
||||
|
||||
// Patterns for kmask shift
|
||||
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
|
||||
def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
|
||||
(VT (COPY_TO_REGCLASS
|
||||
(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
|
||||
(I8Imm $imm)),
|
||||
RC))>;
|
||||
def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
|
||||
(VT (COPY_TO_REGCLASS
|
||||
(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
|
||||
(I8Imm $imm)),
|
||||
RC))>;
|
||||
}
|
||||
|
||||
defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
|
||||
defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
|
||||
defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - Aligned and unaligned load and store
|
||||
//
|
||||
@ -3428,28 +3351,33 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
|
||||
(v16i32 VR512:$src))),
|
||||
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
|
||||
|
||||
multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
|
||||
X86VectorVTInfo Wide> {
|
||||
def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
|
||||
Narrow.RC:$src1, Narrow.RC:$src0)),
|
||||
(EXTRACT_SUBREG
|
||||
(Wide.VT
|
||||
(!cast<Instruction>(InstrStr#"rrk")
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
|
||||
(COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
|
||||
Narrow.SubRegIdx)>;
|
||||
|
||||
def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
|
||||
Narrow.RC:$src1, Narrow.ImmAllZerosV)),
|
||||
(EXTRACT_SUBREG
|
||||
(Wide.VT
|
||||
(!cast<Instruction>(InstrStr#"rrkz")
|
||||
(COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
|
||||
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
|
||||
Narrow.SubRegIdx)>;
|
||||
}
|
||||
|
||||
// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
|
||||
// available. Use a 512-bit operation and extract.
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
|
||||
(v8f32 VR256X:$src0))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16f32
|
||||
(VMOVAPSZrrk
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
|
||||
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
|
||||
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
|
||||
(v8i32 VR256X:$src0))),
|
||||
(EXTRACT_SUBREG
|
||||
(v16i32
|
||||
(VMOVDQA32Zrrk
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
|
||||
(COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
|
||||
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
|
||||
sub_ymm)>;
|
||||
defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
|
||||
defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
@ -4633,7 +4561,7 @@ multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
|
||||
sub_xmm)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
|
||||
defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
|
||||
defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
|
||||
|
@ -94,7 +94,8 @@ let Constraints = "$src1 = $dst" in {
|
||||
// MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
|
||||
// When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
|
||||
multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
||||
OpndItins itins, bit Commutable = 0> {
|
||||
OpndItins itins, bit Commutable = 0,
|
||||
X86MemOperand OType = i64mem> {
|
||||
def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
|
||||
(ins VR64:$src1, VR64:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
@ -103,7 +104,7 @@ let Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
|
||||
(ins VR64:$src1, i64mem:$src2),
|
||||
(ins VR64:$src1, OType:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR64:$dst, (IntId VR64:$src1,
|
||||
(bitconvert (load_mmx addr:$src2))))],
|
||||
@ -524,13 +525,16 @@ defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
|
||||
MMX_UNPCK_H_ITINS>;
|
||||
defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
|
||||
int_x86_mmx_punpcklbw,
|
||||
MMX_UNPCK_L_ITINS>;
|
||||
MMX_UNPCK_L_ITINS,
|
||||
0, i32mem>;
|
||||
defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
|
||||
int_x86_mmx_punpcklwd,
|
||||
MMX_UNPCK_L_ITINS>;
|
||||
MMX_UNPCK_L_ITINS,
|
||||
0, i32mem>;
|
||||
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
|
||||
int_x86_mmx_punpckldq,
|
||||
MMX_UNPCK_L_ITINS>;
|
||||
MMX_UNPCK_L_ITINS,
|
||||
0, i32mem>;
|
||||
|
||||
// -- Pack Instructions
|
||||
defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb,
|
||||
|
@ -440,16 +440,14 @@ static void
|
||||
scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
|
||||
DenseMap<Value *, Value *> &ResolvedValues) {
|
||||
auto *PrevBB = Prev->getParent();
|
||||
auto *I = &*NewBlock->begin();
|
||||
while (auto PN = dyn_cast<PHINode>(I)) {
|
||||
auto V = PN->getIncomingValueForBlock(PrevBB);
|
||||
for (PHINode &PN : NewBlock->phis()) {
|
||||
auto V = PN.getIncomingValueForBlock(PrevBB);
|
||||
// See if we already resolved it.
|
||||
auto VI = ResolvedValues.find(V);
|
||||
if (VI != ResolvedValues.end())
|
||||
V = VI->second;
|
||||
// Remember the value.
|
||||
ResolvedValues[PN] = V;
|
||||
I = I->getNextNode();
|
||||
ResolvedValues[&PN] = V;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -728,6 +728,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
|
||||
}
|
||||
}
|
||||
|
||||
// sqrt(a) * sqrt(b) -> sqrt(a * b)
|
||||
if (AllowReassociate &&
|
||||
Op0->hasOneUse() && Op1->hasOneUse()) {
|
||||
Value *Opnd0 = nullptr;
|
||||
Value *Opnd1 = nullptr;
|
||||
if (match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd0))) &&
|
||||
match(Op1, m_Intrinsic<Intrinsic::sqrt>(m_Value(Opnd1)))) {
|
||||
BuilderTy::FastMathFlagGuard Guard(Builder);
|
||||
Builder.setFastMathFlags(I.getFastMathFlags());
|
||||
Value *FMulVal = Builder.CreateFMul(Opnd0, Opnd1);
|
||||
Value *Sqrt = Intrinsic::getDeclaration(I.getModule(),
|
||||
Intrinsic::sqrt, I.getType());
|
||||
Value *SqrtCall = Builder.CreateCall(Sqrt, FMulVal);
|
||||
return replaceInstUsesWith(I, SqrtCall);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle symmetric situation in a 2-iteration loop
|
||||
Value *Opnd0 = Op0;
|
||||
Value *Opnd1 = Op1;
|
||||
|
@ -265,15 +265,12 @@ static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2,
|
||||
CallSite CS2(CallInst2);
|
||||
|
||||
// Handle PHIs used as arguments in the call-site.
|
||||
for (auto &PI : *TailBB) {
|
||||
PHINode *PN = dyn_cast<PHINode>(&PI);
|
||||
if (!PN)
|
||||
break;
|
||||
for (PHINode &PN : TailBB->phis()) {
|
||||
unsigned ArgNo = 0;
|
||||
for (auto &CI : CS.args()) {
|
||||
if (&*CI == PN) {
|
||||
CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1));
|
||||
CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2));
|
||||
if (&*CI == &PN) {
|
||||
CS1.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock1));
|
||||
CS2.setArgument(ArgNo, PN.getIncomingValueForBlock(SplitBlock2));
|
||||
}
|
||||
++ArgNo;
|
||||
}
|
||||
|
@ -592,12 +592,8 @@ class GVNSink {
|
||||
/// Create a ModelledPHI for each PHI in BB, adding to PHIs.
|
||||
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
|
||||
SmallPtrSetImpl<Value *> &PHIContents) {
|
||||
for (auto &I : *BB) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
return;
|
||||
|
||||
auto MPHI = ModelledPHI(PN);
|
||||
for (PHINode &PN : BB->phis()) {
|
||||
auto MPHI = ModelledPHI(&PN);
|
||||
PHIs.insert(MPHI);
|
||||
for (auto *V : MPHI.getValues())
|
||||
PHIContents.insert(V);
|
||||
|
@ -485,9 +485,8 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
|
||||
BasicBlock *Header = L->getHeader();
|
||||
|
||||
SmallVector<WeakTrackingVH, 8> PHIs;
|
||||
for (BasicBlock::iterator I = Header->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I)
|
||||
PHIs.push_back(PN);
|
||||
for (PHINode &PN : Header->phis())
|
||||
PHIs.push_back(&PN);
|
||||
|
||||
for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
|
||||
if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
|
||||
@ -724,13 +723,12 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
|
||||
assert(LoopHeader && "Invalid loop");
|
||||
|
||||
for (auto *ExitBB : ExitBlocks) {
|
||||
BasicBlock::iterator BBI = ExitBB->begin();
|
||||
// If there are no more PHI nodes in this exit block, then no more
|
||||
// values defined inside the loop are used on this path.
|
||||
while (auto *PN = dyn_cast<PHINode>(BBI++)) {
|
||||
for (unsigned IncomingValIdx = 0, E = PN->getNumIncomingValues();
|
||||
IncomingValIdx != E; ++IncomingValIdx) {
|
||||
auto *IncomingBB = PN->getIncomingBlock(IncomingValIdx);
|
||||
for (PHINode &PN : ExitBB->phis()) {
|
||||
for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
|
||||
IncomingValIdx != E; ++IncomingValIdx) {
|
||||
auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
|
||||
|
||||
// We currently only support loop exits from loop header. If the
|
||||
// incoming block is not loop header, we need to recursively check
|
||||
@ -755,8 +753,7 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
|
||||
if (!L->isLoopInvariant(Cond))
|
||||
continue;
|
||||
|
||||
auto *ExitVal =
|
||||
dyn_cast<PHINode>(PN->getIncomingValue(IncomingValIdx));
|
||||
auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
|
||||
|
||||
// Only deal with PHIs.
|
||||
if (!ExitVal)
|
||||
@ -771,8 +768,8 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
|
||||
if (PreheaderIdx != -1) {
|
||||
assert(ExitVal->getParent() == LoopHeader &&
|
||||
"ExitVal must be in loop header");
|
||||
PN->setIncomingValue(IncomingValIdx,
|
||||
ExitVal->getIncomingValue(PreheaderIdx));
|
||||
PN.setIncomingValue(IncomingValIdx,
|
||||
ExitVal->getIncomingValue(PreheaderIdx));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1174,13 +1174,9 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
|
||||
if (OriginalLoop.contains(SBB))
|
||||
continue; // not an exit block
|
||||
|
||||
for (Instruction &I : *SBB) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
|
||||
Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
|
||||
PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
|
||||
for (PHINode &PN : SBB->phis()) {
|
||||
Value *OldIncoming = PN.getIncomingValueForBlock(OriginalBB);
|
||||
PN.addIncoming(GetClonedValue(OldIncoming), ClonedBB);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1327,16 +1323,12 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
||||
// We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of
|
||||
// each of the PHI nodes in the loop header. This feeds into the initial
|
||||
// value of the same PHI nodes if/when we continue execution.
|
||||
for (Instruction &I : *LS.Header) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
|
||||
PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
|
||||
for (PHINode &PN : LS.Header->phis()) {
|
||||
PHINode *NewPHI = PHINode::Create(PN.getType(), 2, PN.getName() + ".copy",
|
||||
BranchToContinuation);
|
||||
|
||||
NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader);
|
||||
NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch),
|
||||
NewPHI->addIncoming(PN.getIncomingValueForBlock(Preheader), Preheader);
|
||||
NewPHI->addIncoming(PN.getIncomingValueForBlock(LS.Latch),
|
||||
RRI.ExitSelector);
|
||||
RRI.PHIValuesAtPseudoExit.push_back(NewPHI);
|
||||
}
|
||||
@ -1348,12 +1340,8 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
|
||||
|
||||
// The latch exit now has a branch from `RRI.ExitSelector' instead of
|
||||
// `LS.Latch'. The PHI nodes need to be updated to reflect that.
|
||||
for (Instruction &I : *LS.LatchExit) {
|
||||
if (PHINode *PN = dyn_cast<PHINode>(&I))
|
||||
replacePHIBlock(PN, LS.Latch, RRI.ExitSelector);
|
||||
else
|
||||
break;
|
||||
}
|
||||
for (PHINode &PN : LS.LatchExit->phis())
|
||||
replacePHIBlock(&PN, LS.Latch, RRI.ExitSelector);
|
||||
|
||||
return RRI;
|
||||
}
|
||||
@ -1362,15 +1350,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
|
||||
LoopStructure &LS, BasicBlock *ContinuationBlock,
|
||||
const LoopConstrainer::RewrittenRangeInfo &RRI) const {
|
||||
unsigned PHIIndex = 0;
|
||||
for (Instruction &I : *LS.Header) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
|
||||
if (PN->getIncomingBlock(i) == ContinuationBlock)
|
||||
PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
|
||||
}
|
||||
for (PHINode &PN : LS.Header->phis())
|
||||
for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
|
||||
if (PN.getIncomingBlock(i) == ContinuationBlock)
|
||||
PN.setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
|
||||
|
||||
LS.IndVarStart = RRI.IndVarEnd;
|
||||
}
|
||||
@ -1381,14 +1364,9 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
|
||||
BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header);
|
||||
BranchInst::Create(LS.Header, Preheader);
|
||||
|
||||
for (Instruction &I : *LS.Header) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
break;
|
||||
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
|
||||
replacePHIBlock(PN, OldPreheader, Preheader);
|
||||
}
|
||||
for (PHINode &PN : LS.Header->phis())
|
||||
for (unsigned i = 0, e = PN.getNumIncomingValues(); i < e; ++i)
|
||||
replacePHIBlock(&PN, OldPreheader, Preheader);
|
||||
|
||||
return Preheader;
|
||||
}
|
||||
|
@ -1800,11 +1800,10 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
|
||||
BasicBlock *OldPred,
|
||||
BasicBlock *NewPred,
|
||||
DenseMap<Instruction*, Value*> &ValueMap) {
|
||||
for (BasicBlock::iterator PNI = PHIBB->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
|
||||
for (PHINode &PN : PHIBB->phis()) {
|
||||
// Ok, we have a PHI node. Figure out what the incoming value was for the
|
||||
// DestBlock.
|
||||
Value *IV = PN->getIncomingValueForBlock(OldPred);
|
||||
Value *IV = PN.getIncomingValueForBlock(OldPred);
|
||||
|
||||
// Remap the value if necessary.
|
||||
if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
|
||||
@ -1813,7 +1812,7 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
|
||||
IV = I->second;
|
||||
}
|
||||
|
||||
PN->addIncoming(IV, NewPred);
|
||||
PN.addIncoming(IV, NewPred);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,11 +49,10 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
|
||||
// must pass through a PHI in the exit block, meaning that this check is
|
||||
// sufficient to guarantee that no loop-variant values are used outside
|
||||
// of the loop.
|
||||
BasicBlock::iterator BI = ExitBlock->begin();
|
||||
bool AllEntriesInvariant = true;
|
||||
bool AllOutgoingValuesSame = true;
|
||||
while (PHINode *P = dyn_cast<PHINode>(BI)) {
|
||||
Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
|
||||
for (PHINode &P : ExitBlock->phis()) {
|
||||
Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
|
||||
|
||||
// Make sure all exiting blocks produce the same incoming value for the exit
|
||||
// block. If there are different incoming values for different exiting
|
||||
@ -61,7 +60,7 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
|
||||
// be used.
|
||||
AllOutgoingValuesSame =
|
||||
all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
|
||||
return incoming == P->getIncomingValueForBlock(BB);
|
||||
return incoming == P.getIncomingValueForBlock(BB);
|
||||
});
|
||||
|
||||
if (!AllOutgoingValuesSame)
|
||||
@ -72,8 +71,6 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
|
||||
AllEntriesInvariant = false;
|
||||
break;
|
||||
}
|
||||
|
||||
++BI;
|
||||
}
|
||||
|
||||
if (Changed)
|
||||
@ -162,11 +159,9 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
|
||||
if (ExitBlock && isLoopNeverExecuted(L)) {
|
||||
DEBUG(dbgs() << "Loop is proven to never execute, delete it!");
|
||||
// Set incoming value to undef for phi nodes in the exit block.
|
||||
BasicBlock::iterator BI = ExitBlock->begin();
|
||||
while (PHINode *P = dyn_cast<PHINode>(BI)) {
|
||||
for (unsigned i = 0; i < P->getNumIncomingValues(); i++)
|
||||
P->setIncomingValue(i, UndefValue::get(P->getType()));
|
||||
BI++;
|
||||
for (PHINode &P : ExitBlock->phis()) {
|
||||
std::fill(P.incoming_values().begin(), P.incoming_values().end(),
|
||||
UndefValue::get(P.getType()));
|
||||
}
|
||||
deleteDeadLoop(L, &DT, &SE, &LI);
|
||||
++NumDeleted;
|
||||
|
@ -857,12 +857,11 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
|
||||
|
||||
/// Return true if this AddRec is already a phi in its loop.
|
||||
static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
|
||||
for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
if (SE.isSCEVable(PN->getType()) &&
|
||||
(SE.getEffectiveSCEVType(PN->getType()) ==
|
||||
for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
|
||||
if (SE.isSCEVable(PN.getType()) &&
|
||||
(SE.getEffectiveSCEVType(PN.getType()) ==
|
||||
SE.getEffectiveSCEVType(AR->getType())) &&
|
||||
SE.getSCEV(PN) == AR)
|
||||
SE.getSCEV(&PN) == AR)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -3013,15 +3012,14 @@ void LSRInstance::CollectChains() {
|
||||
} // Continue walking down the instructions.
|
||||
} // Continue walking down the domtree.
|
||||
// Visit phi backedges to determine if the chain can generate the IV postinc.
|
||||
for (BasicBlock::iterator I = L->getHeader()->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
if (!SE.isSCEVable(PN->getType()))
|
||||
for (PHINode &PN : L->getHeader()->phis()) {
|
||||
if (!SE.isSCEVable(PN.getType()))
|
||||
continue;
|
||||
|
||||
Instruction *IncV =
|
||||
dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
|
||||
dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
|
||||
if (IncV)
|
||||
ChainInstruction(PN, IncV, ChainUsersVec);
|
||||
ChainInstruction(&PN, IncV, ChainUsersVec);
|
||||
}
|
||||
// Remove any unprofitable chains.
|
||||
unsigned ChainIdx = 0;
|
||||
@ -3152,12 +3150,11 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
|
||||
// If LSR created a new, wider phi, we may also replace its postinc. We only
|
||||
// do this if we also found a wide value for the head of the chain.
|
||||
if (isa<PHINode>(Chain.tailUserInst())) {
|
||||
for (BasicBlock::iterator I = L->getHeader()->begin();
|
||||
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
|
||||
if (!isCompatibleIVType(Phi, IVSrc))
|
||||
for (PHINode &Phi : L->getHeader()->phis()) {
|
||||
if (!isCompatibleIVType(&Phi, IVSrc))
|
||||
continue;
|
||||
Instruction *PostIncV = dyn_cast<Instruction>(
|
||||
Phi->getIncomingValueForBlock(L->getLoopLatch()));
|
||||
Phi.getIncomingValueForBlock(L->getLoopLatch()));
|
||||
if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
|
||||
continue;
|
||||
Value *IVOper = IVSrc;
|
||||
@ -3168,7 +3165,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
|
||||
Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
|
||||
IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
|
||||
}
|
||||
Phi->replaceUsesOfWith(PostIncV, IVOper);
|
||||
Phi.replaceUsesOfWith(PostIncV, IVOper);
|
||||
DeadInsts.emplace_back(PostIncV);
|
||||
}
|
||||
}
|
||||
|
@ -1274,12 +1274,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
|
||||
|
||||
// If the successor of the exit block had PHI nodes, add an entry for
|
||||
// NewExit.
|
||||
for (BasicBlock::iterator I = ExitSucc->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(I); ++I) {
|
||||
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
|
||||
for (PHINode &PN : ExitSucc->phis()) {
|
||||
Value *V = PN.getIncomingValueForBlock(ExitBlocks[i]);
|
||||
ValueToValueMapTy::iterator It = VMap.find(V);
|
||||
if (It != VMap.end()) V = It->second;
|
||||
PN->addIncoming(V, NewExit);
|
||||
PN.addIncoming(V, NewExit);
|
||||
}
|
||||
|
||||
if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
|
||||
@ -1496,10 +1495,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
|
||||
BranchInst::Create(Abort, OldSISucc,
|
||||
ConstantInt::getTrue(Context), NewSISucc);
|
||||
// Release the PHI operands for this edge.
|
||||
for (BasicBlock::iterator II = NewSISucc->begin();
|
||||
PHINode *PN = dyn_cast<PHINode>(II); ++II)
|
||||
PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
|
||||
UndefValue::get(PN->getType()));
|
||||
for (PHINode &PN : NewSISucc->phis())
|
||||
PN.setIncomingValue(PN.getBasicBlockIndex(Switch),
|
||||
UndefValue::get(PN.getType()));
|
||||
// Tell the domtree about the new block. We don't fully update the
|
||||
// domtree here -- instead we force it to do a full recomputation
|
||||
// after the pass is complete -- but we do need to inform it of
|
||||
|
@ -523,10 +523,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
|
||||
DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
|
||||
<< " -> " << Dest->getName() << '\n');
|
||||
|
||||
PHINode *PN;
|
||||
for (BasicBlock::iterator I = Dest->begin();
|
||||
(PN = dyn_cast<PHINode>(I)); ++I)
|
||||
visitPHINode(*PN);
|
||||
for (PHINode &PN : Dest->phis())
|
||||
visitPHINode(PN);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -271,19 +271,14 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
|
||||
static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
|
||||
BasicBlock &OldExitingBB,
|
||||
BasicBlock &OldPH) {
|
||||
for (Instruction &I : UnswitchedBB) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
// No more PHIs to check.
|
||||
break;
|
||||
|
||||
for (PHINode &PN : UnswitchedBB.phis()) {
|
||||
// When the loop exit is directly unswitched we just need to update the
|
||||
// incoming basic block. We loop to handle weird cases with repeated
|
||||
// incoming blocks, but expect to typically only have one operand here.
|
||||
for (auto i : seq<int>(0, PN->getNumOperands())) {
|
||||
assert(PN->getIncomingBlock(i) == &OldExitingBB &&
|
||||
for (auto i : seq<int>(0, PN.getNumOperands())) {
|
||||
assert(PN.getIncomingBlock(i) == &OldExitingBB &&
|
||||
"Found incoming block different from unique predecessor!");
|
||||
PN->setIncomingBlock(i, &OldPH);
|
||||
PN.setIncomingBlock(i, &OldPH);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -302,14 +297,9 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
|
||||
assert(&ExitBB != &UnswitchedBB &&
|
||||
"Must have different loop exit and unswitched blocks!");
|
||||
Instruction *InsertPt = &*UnswitchedBB.begin();
|
||||
for (Instruction &I : ExitBB) {
|
||||
auto *PN = dyn_cast<PHINode>(&I);
|
||||
if (!PN)
|
||||
// No more PHIs to check.
|
||||
break;
|
||||
|
||||
auto *NewPN = PHINode::Create(PN->getType(), /*NumReservedValues*/ 2,
|
||||
PN->getName() + ".split", InsertPt);
|
||||
for (PHINode &PN : ExitBB.phis()) {
|
||||
auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
|
||||
PN.getName() + ".split", InsertPt);
|
||||
|
||||
// Walk backwards over the old PHI node's inputs to minimize the cost of
|
||||
// removing each one. We have to do this weird loop manually so that we
|
||||
@ -320,18 +310,18 @@ static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
|
||||
// allowed us to create a single entry for a predecessor block without
|
||||
// having separate entries for each "edge" even though these edges are
|
||||
// required to produce identical results.
|
||||
for (int i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
|
||||
if (PN->getIncomingBlock(i) != &OldExitingBB)
|
||||
for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
|
||||
if (PN.getIncomingBlock(i) != &OldExitingBB)
|
||||
continue;
|
||||
|
||||
Value *Incoming = PN->removeIncomingValue(i);
|
||||
Value *Incoming = PN.removeIncomingValue(i);
|
||||
NewPN->addIncoming(Incoming, &OldPH);
|
||||
}
|
||||
|
||||
// Now replace the old PHI with the new one and wire the old one in as an
|
||||
// input to the new one.
|
||||
PN->replaceAllUsesWith(NewPN);
|
||||
NewPN->addIncoming(PN, &ExitBB);
|
||||
PN.replaceAllUsesWith(NewPN);
|
||||
NewPN->addIncoming(&PN, &ExitBB);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -544,10 +544,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
|
||||
/// them in DeletedPhis
|
||||
void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
|
||||
PhiMap &Map = DeletedPhis[To];
|
||||
for (Instruction &I : *To) {
|
||||
if (!isa<PHINode>(I))
|
||||
break;
|
||||
PHINode &Phi = cast<PHINode>(I);
|
||||
for (PHINode &Phi : To->phis()) {
|
||||
while (Phi.getBasicBlockIndex(From) != -1) {
|
||||
Value *Deleted = Phi.removeIncomingValue(From, false);
|
||||
Map[&Phi].push_back(std::make_pair(From, Deleted));
|
||||
@ -557,10 +554,7 @@ void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
|
||||
|
||||
/// \brief Add a dummy PHI value as soon as we knew the new predecessor
|
||||
void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
|
||||
for (Instruction &I : *To) {
|
||||
if (!isa<PHINode>(I))
|
||||
break;
|
||||
PHINode &Phi = cast<PHINode>(I);
|
||||
for (PHINode &Phi : To->phis()) {
|
||||
Value *Undef = UndefValue::get(Phi.getType());
|
||||
Phi.addIncoming(Undef, From);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user