Vendor import of llvm-project branch release/11.x
llvmorg-11.0.0-rc1-25-g903c872b169.
This commit is contained in:
parent
2cedf08916
commit
899468a000
@ -2244,7 +2244,11 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
|
||||
[](OpenMPDirectiveKind K) { return isOpenMPTaskingDirective(K); },
|
||||
Level)) {
|
||||
bool IsTriviallyCopyable =
|
||||
D->getType().getNonReferenceType().isTriviallyCopyableType(Context);
|
||||
D->getType().getNonReferenceType().isTriviallyCopyableType(Context) &&
|
||||
!D->getType()
|
||||
.getNonReferenceType()
|
||||
.getCanonicalType()
|
||||
->getAsCXXRecordDecl();
|
||||
OpenMPDirectiveKind DKind = DSAStack->getDirective(Level);
|
||||
SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
|
||||
getOpenMPCaptureRegions(CaptureRegions, DKind);
|
||||
|
@ -140,6 +140,7 @@ struct Configuration {
|
||||
bool safeSEH = false;
|
||||
Symbol *sehTable = nullptr;
|
||||
Symbol *sehCount = nullptr;
|
||||
bool noSEH = false;
|
||||
|
||||
// Used for /opt:lldlto=N
|
||||
unsigned ltoo = 2;
|
||||
|
@ -1700,9 +1700,10 @@ void LinkerDriver::link(ArrayRef<const char *> argsArr) {
|
||||
config->wordsize = config->is64() ? 8 : 4;
|
||||
|
||||
// Handle /safeseh, x86 only, on by default, except for mingw.
|
||||
if (config->machine == I386 &&
|
||||
args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw))
|
||||
config->safeSEH = true;
|
||||
if (config->machine == I386) {
|
||||
config->safeSEH = args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw);
|
||||
config->noSEH = args.hasArg(OPT_noseh);
|
||||
}
|
||||
|
||||
// Handle /functionpadmin
|
||||
for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt))
|
||||
|
@ -348,13 +348,13 @@ void ObjFile::recordPrevailingSymbolForMingw(
|
||||
// of the section chunk we actually include instead of discarding it,
|
||||
// add the symbol to a map to allow using it for implicitly
|
||||
// associating .[px]data$<func> sections to it.
|
||||
// Use the suffix from the .text$<func> instead of the leader symbol
|
||||
// name, for cases where the names differ (i386 mangling/decorations,
|
||||
// cases where the leader is a weak symbol named .weak.func.default*).
|
||||
int32_t sectionNumber = sym.getSectionNumber();
|
||||
SectionChunk *sc = sparseChunks[sectionNumber];
|
||||
if (sc && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) {
|
||||
StringRef name;
|
||||
name = check(coffObj->getSymbolName(sym));
|
||||
if (getMachineType() == I386)
|
||||
name.consume_front("_");
|
||||
StringRef name = sc->getSectionName().split('$').second;
|
||||
prevailingSectionMap[name] = sectionNumber;
|
||||
}
|
||||
}
|
||||
|
@ -34,6 +34,11 @@ AutoExporter::AutoExporter() {
|
||||
"libclang_rt.builtins-arm",
|
||||
"libclang_rt.builtins-i386",
|
||||
"libclang_rt.builtins-x86_64",
|
||||
"libclang_rt.profile",
|
||||
"libclang_rt.profile-aarch64",
|
||||
"libclang_rt.profile-arm",
|
||||
"libclang_rt.profile-i386",
|
||||
"libclang_rt.profile-x86_64",
|
||||
"libc++",
|
||||
"libc++abi",
|
||||
"libunwind",
|
||||
@ -57,6 +62,10 @@ AutoExporter::AutoExporter() {
|
||||
"__builtin_",
|
||||
// Artificial symbols such as .refptr
|
||||
".",
|
||||
// profile generate symbols
|
||||
"__profc_",
|
||||
"__profd_",
|
||||
"__profvp_",
|
||||
};
|
||||
|
||||
excludeSymbolSuffixes = {
|
||||
|
@ -204,6 +204,7 @@ def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">,
|
||||
HelpText<"Add symbol as undefined, but allow it to remain undefined">;
|
||||
def kill_at : F<"kill-at">;
|
||||
def lldmingw : F<"lldmingw">;
|
||||
def noseh : F<"noseh">;
|
||||
def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">;
|
||||
def pdb_source_path : P<"pdbsourcepath",
|
||||
"Base path used to make relative source file path absolute in PDB">;
|
||||
|
@ -1393,7 +1393,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
|
||||
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF;
|
||||
if (config->integrityCheck)
|
||||
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY;
|
||||
if (setNoSEHCharacteristic)
|
||||
if (setNoSEHCharacteristic || config->noSEH)
|
||||
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH;
|
||||
if (config->terminalServerAware)
|
||||
pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE;
|
||||
|
@ -134,6 +134,12 @@ public:
|
||||
/// was called).
|
||||
virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
|
||||
|
||||
/// This method returns whether or not it is safe for an object with the
|
||||
/// given stack id to be bundled into the local area.
|
||||
virtual bool isStackIdSafeForLocalArea(unsigned StackId) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// getOffsetOfLocalArea - This method returns the offset of the local area
|
||||
/// from the stack pointer on entrance to a function.
|
||||
///
|
||||
|
@ -1648,8 +1648,32 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
|
||||
}
|
||||
|
||||
SmallVector<Value *, 4> V1Srcs;
|
||||
// For a recursive phi, that recurses through a contant gep, we can perform
|
||||
// aliasing calculations using the other phi operands with an unknown size to
|
||||
// specify that an unknown number of elements after the initial value are
|
||||
// potentially accessed.
|
||||
bool isRecursive = false;
|
||||
if (PV) {
|
||||
auto CheckForRecPhi = [&](Value *PV) {
|
||||
if (!EnableRecPhiAnalysis)
|
||||
return false;
|
||||
if (GEPOperator *PVGEP = dyn_cast<GEPOperator>(PV)) {
|
||||
// Check whether the incoming value is a GEP that advances the pointer
|
||||
// result of this PHI node (e.g. in a loop). If this is the case, we
|
||||
// would recurse and always get a MayAlias. Handle this case specially
|
||||
// below. We need to ensure that the phi is inbounds and has a constant
|
||||
// positive operand so that we can check for alias with the initial value
|
||||
// and an unknown but positive size.
|
||||
if (PVGEP->getPointerOperand() == PN && PVGEP->isInBounds() &&
|
||||
PVGEP->getNumIndices() == 1 && isa<ConstantInt>(PVGEP->idx_begin()) &&
|
||||
!cast<ConstantInt>(PVGEP->idx_begin())->isNegative()) {
|
||||
isRecursive = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (PV) {
|
||||
// If we have PhiValues then use it to get the underlying phi values.
|
||||
const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN);
|
||||
// If we have more phi values than the search depth then return MayAlias
|
||||
@ -1660,19 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
|
||||
return MayAlias;
|
||||
// Add the values to V1Srcs
|
||||
for (Value *PV1 : PhiValueSet) {
|
||||
if (EnableRecPhiAnalysis) {
|
||||
if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
|
||||
// Check whether the incoming value is a GEP that advances the pointer
|
||||
// result of this PHI node (e.g. in a loop). If this is the case, we
|
||||
// would recurse and always get a MayAlias. Handle this case specially
|
||||
// below.
|
||||
if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
|
||||
isa<ConstantInt>(PV1GEP->idx_begin())) {
|
||||
isRecursive = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (CheckForRecPhi(PV1))
|
||||
continue;
|
||||
V1Srcs.push_back(PV1);
|
||||
}
|
||||
} else {
|
||||
@ -1687,18 +1700,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
|
||||
// and 'n' are the number of PHI sources.
|
||||
return MayAlias;
|
||||
|
||||
if (EnableRecPhiAnalysis)
|
||||
if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
|
||||
// Check whether the incoming value is a GEP that advances the pointer
|
||||
// result of this PHI node (e.g. in a loop). If this is the case, we
|
||||
// would recurse and always get a MayAlias. Handle this case specially
|
||||
// below.
|
||||
if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
|
||||
isa<ConstantInt>(PV1GEP->idx_begin())) {
|
||||
isRecursive = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (CheckForRecPhi(PV1))
|
||||
continue;
|
||||
|
||||
if (UniqueSrc.insert(PV1).second)
|
||||
V1Srcs.push_back(PV1);
|
||||
|
@ -220,6 +220,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
continue;
|
||||
if (StackProtectorFI == (int)i)
|
||||
continue;
|
||||
if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
|
||||
continue;
|
||||
|
||||
switch (MFI.getObjectSSPLayout(i)) {
|
||||
case MachineFrameInfo::SSPLK_None:
|
||||
@ -254,6 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
continue;
|
||||
if (ProtectedObjs.count(i))
|
||||
continue;
|
||||
if (!TFI.isStackIdSafeForLocalArea(MFI.getStackID(i)))
|
||||
continue;
|
||||
|
||||
AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
|
||||
}
|
||||
|
@ -11372,9 +11372,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
|
||||
// Stop if more than one members are non-undef.
|
||||
if (NumDefs > 1)
|
||||
break;
|
||||
|
||||
VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
|
||||
VT.getVectorElementType(),
|
||||
X.getValueType().getVectorNumElements()));
|
||||
X.getValueType().getVectorElementCount()));
|
||||
}
|
||||
|
||||
if (NumDefs == 0)
|
||||
@ -18795,6 +18796,11 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
|
||||
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
|
||||
EVT VT = N->getValueType(0);
|
||||
EVT OpVT = N->getOperand(0).getValueType();
|
||||
|
||||
// We currently can't generate an appropriate shuffle for a scalable vector.
|
||||
if (VT.isScalableVector())
|
||||
return SDValue();
|
||||
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
int NumOpElts = OpVT.getVectorNumElements();
|
||||
|
||||
@ -19055,11 +19061,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
|
||||
return V;
|
||||
|
||||
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
|
||||
// nodes often generate nop CONCAT_VECTOR nodes.
|
||||
// Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
|
||||
// place the incoming vectors at the exact same location.
|
||||
// nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
|
||||
// operands and look for a CONCAT operations that place the incoming vectors
|
||||
// at the exact same location.
|
||||
//
|
||||
// For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
|
||||
SDValue SingleSource = SDValue();
|
||||
unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
|
||||
unsigned PartNumElem =
|
||||
N->getOperand(0).getValueType().getVectorMinNumElements();
|
||||
|
||||
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
||||
SDValue Op = N->getOperand(i);
|
||||
@ -19181,7 +19190,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
|
||||
|
||||
// The binop must be a vector type, so we can extract some fraction of it.
|
||||
EVT WideBVT = BinOp.getValueType();
|
||||
if (!WideBVT.isVector())
|
||||
// The optimisations below currently assume we are dealing with fixed length
|
||||
// vectors. It is possible to add support for scalable vectors, but at the
|
||||
// moment we've done no analysis to prove whether they are profitable or not.
|
||||
if (!WideBVT.isFixedLengthVector())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Extract->getValueType(0);
|
||||
|
@ -2151,7 +2151,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
|
||||
EVT InVT = Lo.getValueType();
|
||||
|
||||
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
|
||||
InVT.getVectorNumElements());
|
||||
InVT.getVectorElementCount());
|
||||
|
||||
if (N->isStrictFPOpcode()) {
|
||||
Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
|
||||
@ -2197,13 +2197,19 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
|
||||
SDValue Idx = N->getOperand(1);
|
||||
SDLoc dl(N);
|
||||
SDValue Lo, Hi;
|
||||
|
||||
if (SubVT.isScalableVector() !=
|
||||
N->getOperand(0).getValueType().isScalableVector())
|
||||
report_fatal_error("Extracting a fixed-length vector from an illegal "
|
||||
"scalable vector is not yet supported");
|
||||
|
||||
GetSplitVector(N->getOperand(0), Lo, Hi);
|
||||
|
||||
uint64_t LoElts = Lo.getValueType().getVectorNumElements();
|
||||
uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
|
||||
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
|
||||
if (IdxVal < LoElts) {
|
||||
assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
|
||||
assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts &&
|
||||
"Extracted subvector crosses vector split!");
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
|
||||
} else {
|
||||
@ -2559,13 +2565,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
||||
SDValue InVec = N->getOperand(OpNo);
|
||||
EVT InVT = InVec->getValueType(0);
|
||||
EVT OutVT = N->getValueType(0);
|
||||
unsigned NumElements = OutVT.getVectorNumElements();
|
||||
ElementCount NumElements = OutVT.getVectorElementCount();
|
||||
bool IsFloat = OutVT.isFloatingPoint();
|
||||
|
||||
// Widening should have already made sure this is a power-two vector
|
||||
// if we're trying to split it at all. assert() that's true, just in case.
|
||||
assert(!(NumElements & 1) && "Splitting vector, but not in half!");
|
||||
|
||||
unsigned InElementSize = InVT.getScalarSizeInBits();
|
||||
unsigned OutElementSize = OutVT.getScalarSizeInBits();
|
||||
|
||||
@ -2595,6 +2597,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
|
||||
GetSplitVector(InVec, InLoVec, InHiVec);
|
||||
|
||||
// Truncate them to 1/2 the element size.
|
||||
//
|
||||
// This assumes the number of elements is a power of two; any vector that
|
||||
// isn't should be widened, not split.
|
||||
EVT HalfElementVT = IsFloat ?
|
||||
EVT::getFloatingPointVT(InElementSize/2) :
|
||||
EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
|
||||
@ -3605,16 +3610,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
||||
EVT InVT = N->getOperand(0).getValueType();
|
||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDLoc dl(N);
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
unsigned NumOperands = N->getNumOperands();
|
||||
|
||||
bool InputWidened = false; // Indicates we need to widen the input.
|
||||
if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
|
||||
if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
|
||||
unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
|
||||
unsigned NumInElts = InVT.getVectorMinNumElements();
|
||||
if (WidenNumElts % NumInElts == 0) {
|
||||
// Add undef vectors to widen to correct length.
|
||||
unsigned NumConcat = WidenVT.getVectorNumElements() /
|
||||
InVT.getVectorNumElements();
|
||||
unsigned NumConcat = WidenNumElts / NumInElts;
|
||||
SDValue UndefVal = DAG.getUNDEF(InVT);
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
for (unsigned i=0; i < NumOperands; ++i)
|
||||
@ -3638,6 +3642,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
||||
return GetWidenedVector(N->getOperand(0));
|
||||
|
||||
if (NumOperands == 2) {
|
||||
assert(!WidenVT.isScalableVector() &&
|
||||
"Cannot use vector shuffles to widen CONCAT_VECTOR result");
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
|
||||
// Replace concat of two operands with a shuffle.
|
||||
SmallVector<int, 16> MaskOps(WidenNumElts, -1);
|
||||
for (unsigned i = 0; i < NumInElts; ++i) {
|
||||
@ -3652,6 +3661,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
assert(!WidenVT.isScalableVector() &&
|
||||
"Cannot use build vectors to widen CONCAT_VECTOR result");
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
|
||||
// Fall back to use extracts and build vector.
|
||||
EVT EltVT = WidenVT.getVectorElementType();
|
||||
SmallVector<SDValue, 16> Ops(WidenNumElts);
|
||||
@ -4913,7 +4927,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
|
||||
|
||||
int LdWidth = LdVT.getSizeInBits();
|
||||
int WidthDiff = WidenWidth - LdWidth;
|
||||
// Allow wider loads.
|
||||
// Allow wider loads if they are sufficiently aligned to avoid memory faults
|
||||
// and if the original load is simple.
|
||||
unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
|
||||
|
||||
// Find the vector type that can load from.
|
||||
@ -4965,19 +4980,6 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
|
||||
LD->getPointerInfo().getWithOffset(Offset),
|
||||
LD->getOriginalAlign(), MMOFlags, AAInfo);
|
||||
LdChain.push_back(L.getValue(1));
|
||||
if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
|
||||
// Later code assumes the vector loads produced will be mergeable, so we
|
||||
// must pad the final entry up to the previous width. Scalars are
|
||||
// combined separately.
|
||||
SmallVector<SDValue, 16> Loads;
|
||||
Loads.push_back(L);
|
||||
unsigned size = L->getValueSizeInBits(0);
|
||||
while (size < LdOp->getValueSizeInBits(0)) {
|
||||
Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
|
||||
size += L->getValueSizeInBits(0);
|
||||
}
|
||||
L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
|
||||
}
|
||||
} else {
|
||||
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
|
||||
LD->getPointerInfo().getWithOffset(Offset),
|
||||
@ -5018,8 +5020,17 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
|
||||
EVT NewLdTy = LdOps[i].getValueType();
|
||||
if (NewLdTy != LdTy) {
|
||||
// Create a larger vector.
|
||||
unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
|
||||
assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
|
||||
SmallVector<SDValue, 16> WidenOps(NumOps);
|
||||
unsigned j = 0;
|
||||
for (; j != End-Idx; ++j)
|
||||
WidenOps[j] = ConcatOps[Idx+j];
|
||||
for (; j != NumOps; ++j)
|
||||
WidenOps[j] = DAG.getUNDEF(LdTy);
|
||||
|
||||
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
|
||||
makeArrayRef(&ConcatOps[Idx], End - Idx));
|
||||
WidenOps);
|
||||
Idx = End - 1;
|
||||
LdTy = NewLdTy;
|
||||
}
|
||||
|
@ -375,6 +375,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
|
||||
COFFSymbol *Local = nullptr;
|
||||
if (cast<MCSymbolCOFF>(MCSym).isWeakExternal()) {
|
||||
Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
|
||||
Sym->Section = nullptr;
|
||||
|
||||
COFFSymbol *WeakDefault = getLinkedSymbol(MCSym);
|
||||
if (!WeakDefault) {
|
||||
|
@ -1192,7 +1192,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
// Process the SVE callee-saves to determine what space needs to be
|
||||
// allocated.
|
||||
if (AFI->getSVECalleeSavedStackSize()) {
|
||||
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
|
||||
// Find callee save instructions in frame.
|
||||
CalleeSavesBegin = MBBI;
|
||||
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
|
||||
@ -1200,11 +1200,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
++MBBI;
|
||||
CalleeSavesEnd = MBBI;
|
||||
|
||||
int64_t OffsetToFirstCalleeSaveFromSP =
|
||||
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
|
||||
StackOffset OffsetToCalleeSavesFromSP =
|
||||
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
|
||||
AllocateBefore -= OffsetToCalleeSavesFromSP;
|
||||
AllocateBefore = {CalleeSavedSize, MVT::nxv1i8};
|
||||
AllocateAfter = SVEStackSize - AllocateBefore;
|
||||
}
|
||||
|
||||
@ -1582,7 +1578,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
// deallocated.
|
||||
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
|
||||
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
|
||||
if (AFI->getSVECalleeSavedStackSize()) {
|
||||
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
|
||||
RestoreBegin = std::prev(RestoreEnd);;
|
||||
while (IsSVECalleeSave(RestoreBegin) &&
|
||||
RestoreBegin != MBB.begin())
|
||||
@ -1592,23 +1588,21 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
assert(IsSVECalleeSave(RestoreBegin) &&
|
||||
IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
|
||||
|
||||
int64_t OffsetToFirstCalleeSaveFromSP =
|
||||
MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
|
||||
StackOffset OffsetToCalleeSavesFromSP =
|
||||
StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
|
||||
DeallocateBefore = OffsetToCalleeSavesFromSP;
|
||||
DeallocateAfter = SVEStackSize - DeallocateBefore;
|
||||
StackOffset CalleeSavedSizeAsOffset = {CalleeSavedSize, MVT::nxv1i8};
|
||||
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
|
||||
DeallocateAfter = CalleeSavedSizeAsOffset;
|
||||
}
|
||||
|
||||
// Deallocate the SVE area.
|
||||
if (SVEStackSize) {
|
||||
if (AFI->isStackRealigned()) {
|
||||
if (AFI->getSVECalleeSavedStackSize())
|
||||
// Set SP to start of SVE area, from which the callee-save reloads
|
||||
// can be done. The code below will deallocate the stack space
|
||||
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize())
|
||||
// Set SP to start of SVE callee-save area from which they can
|
||||
// be reloaded. The code below will deallocate the stack space
|
||||
// space by moving FP -> SP.
|
||||
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
|
||||
-SVEStackSize, TII, MachineInstr::FrameDestroy);
|
||||
{-CalleeSavedSize, MVT::nxv1i8}, TII,
|
||||
MachineInstr::FrameDestroy);
|
||||
} else {
|
||||
if (AFI->getSVECalleeSavedStackSize()) {
|
||||
// Deallocate the non-SVE locals first before we can deallocate (and
|
||||
@ -2595,25 +2589,23 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
|
||||
int &MinCSFrameIndex,
|
||||
int &MaxCSFrameIndex,
|
||||
bool AssignOffsets) {
|
||||
#ifndef NDEBUG
|
||||
// First process all fixed stack objects.
|
||||
int64_t Offset = 0;
|
||||
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
|
||||
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
|
||||
int64_t FixedOffset = -MFI.getObjectOffset(I);
|
||||
if (FixedOffset > Offset)
|
||||
Offset = FixedOffset;
|
||||
}
|
||||
assert(MFI.getStackID(I) != TargetStackID::SVEVector &&
|
||||
"SVE vectors should never be passed on the stack by value, only by "
|
||||
"reference.");
|
||||
#endif
|
||||
|
||||
auto Assign = [&MFI](int FI, int64_t Offset) {
|
||||
LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
|
||||
MFI.setObjectOffset(FI, Offset);
|
||||
};
|
||||
|
||||
int64_t Offset = 0;
|
||||
|
||||
// Then process all callee saved slots.
|
||||
if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
|
||||
// Make sure to align the last callee save slot.
|
||||
MFI.setObjectAlignment(MaxCSFrameIndex, Align(16));
|
||||
|
||||
// Assign offsets to the callee save slots.
|
||||
for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
|
||||
Offset += MFI.getObjectSize(I);
|
||||
@ -2623,6 +2615,9 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that the Callee-save area is aligned to 16bytes.
|
||||
Offset = alignTo(Offset, Align(16U));
|
||||
|
||||
// Create a buffer of SVE objects to allocate and sort it.
|
||||
SmallVector<int, 8> ObjectsToAllocate;
|
||||
for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
|
||||
|
@ -105,6 +105,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
|
||||
// We don't support putting SVE objects into the pre-allocated local
|
||||
// frame block at the moment.
|
||||
return StackId != TargetStackID::SVEVector;
|
||||
}
|
||||
|
||||
private:
|
||||
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
|
||||
uint64_t StackBumpBytes) const;
|
||||
|
@ -245,7 +245,8 @@ public:
|
||||
unsigned SubRegIdx);
|
||||
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, const unsigned Opc);
|
||||
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
|
||||
unsigned Opc_rr, unsigned Opc_ri);
|
||||
|
||||
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
|
||||
/// SVE Reg+Imm addressing mode.
|
||||
@ -262,14 +263,12 @@ public:
|
||||
void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||
void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||
void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
|
||||
template <unsigned Scale>
|
||||
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
|
||||
const unsigned Opc_ri);
|
||||
template <unsigned Scale>
|
||||
void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
|
||||
unsigned Opc_rr, unsigned Opc_ri);
|
||||
std::tuple<unsigned, SDValue, SDValue>
|
||||
findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
|
||||
const unsigned Opc_ri, const SDValue &OldBase,
|
||||
const SDValue &OldOffset);
|
||||
findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
|
||||
const SDValue &OldBase, const SDValue &OldOffset,
|
||||
unsigned Scale);
|
||||
|
||||
bool tryBitfieldExtractOp(SDNode *N);
|
||||
bool tryBitfieldExtractOpFromSExt(SDNode *N);
|
||||
@ -1414,12 +1413,12 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
|
||||
/// Optimize \param OldBase and \param OldOffset selecting the best addressing
|
||||
/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
|
||||
/// new Base and an SDValue representing the new offset.
|
||||
template <unsigned Scale>
|
||||
std::tuple<unsigned, SDValue, SDValue>
|
||||
AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
|
||||
const unsigned Opc_ri,
|
||||
AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
|
||||
unsigned Opc_ri,
|
||||
const SDValue &OldBase,
|
||||
const SDValue &OldOffset) {
|
||||
const SDValue &OldOffset,
|
||||
unsigned Scale) {
|
||||
SDValue NewBase = OldBase;
|
||||
SDValue NewOffset = OldOffset;
|
||||
// Detect a possible Reg+Imm addressing mode.
|
||||
@ -1429,21 +1428,30 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
|
||||
// Detect a possible reg+reg addressing mode, but only if we haven't already
|
||||
// detected a Reg+Imm one.
|
||||
const bool IsRegReg =
|
||||
!IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
|
||||
!IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
|
||||
|
||||
// Select the instruction.
|
||||
return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
|
||||
}
|
||||
|
||||
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
|
||||
const unsigned Opc) {
|
||||
unsigned Scale, unsigned Opc_ri,
|
||||
unsigned Opc_rr) {
|
||||
assert(Scale < 4 && "Invalid scaling value.");
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
|
||||
// Optimize addressing mode.
|
||||
SDValue Base, Offset;
|
||||
unsigned Opc;
|
||||
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
|
||||
N, Opc_rr, Opc_ri, N->getOperand(2),
|
||||
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
|
||||
|
||||
SDValue Ops[] = {N->getOperand(1), // Predicate
|
||||
N->getOperand(2), // Memory operand
|
||||
CurDAG->getTargetConstant(0, DL, MVT::i64), Chain};
|
||||
Base, // Memory operand
|
||||
Offset, Chain};
|
||||
|
||||
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
|
||||
|
||||
@ -1479,10 +1487,9 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
|
||||
ReplaceNode(N, St);
|
||||
}
|
||||
|
||||
template <unsigned Scale>
|
||||
void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
|
||||
const unsigned Opc_rr,
|
||||
const unsigned Opc_ri) {
|
||||
unsigned Scale, unsigned Opc_rr,
|
||||
unsigned Opc_ri) {
|
||||
SDLoc dl(N);
|
||||
|
||||
// Form a REG_SEQUENCE to force register allocation.
|
||||
@ -1492,9 +1499,9 @@ void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
|
||||
// Optimize addressing mode.
|
||||
unsigned Opc;
|
||||
SDValue Offset, Base;
|
||||
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
|
||||
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
|
||||
N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
|
||||
CurDAG->getTargetConstant(0, dl, MVT::i64));
|
||||
CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
|
||||
|
||||
SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
|
||||
Base, // address
|
||||
@ -4085,63 +4092,51 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
}
|
||||
case Intrinsic::aarch64_sve_st2: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
|
||||
AArch64::ST2B_IMM);
|
||||
SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
|
||||
AArch64::ST2H_IMM);
|
||||
SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
|
||||
AArch64::ST2W_IMM);
|
||||
SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
|
||||
AArch64::ST2D_IMM);
|
||||
SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::aarch64_sve_st3: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
|
||||
AArch64::ST3B_IMM);
|
||||
SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
|
||||
AArch64::ST3H_IMM);
|
||||
SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
|
||||
AArch64::ST3W_IMM);
|
||||
SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
|
||||
AArch64::ST3D_IMM);
|
||||
SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::aarch64_sve_st4: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
|
||||
AArch64::ST4B_IMM);
|
||||
SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
|
||||
AArch64::ST4H_IMM);
|
||||
SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
|
||||
AArch64::ST4W_IMM);
|
||||
SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
|
||||
AArch64::ST4D_IMM);
|
||||
SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@ -4741,51 +4736,51 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
}
|
||||
case AArch64ISD::SVE_LD2_MERGE_ZERO: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedLoad(Node, 2, AArch64::LD2B_IMM);
|
||||
SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedLoad(Node, 2, AArch64::LD2H_IMM);
|
||||
SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedLoad(Node, 2, AArch64::LD2W_IMM);
|
||||
SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedLoad(Node, 2, AArch64::LD2D_IMM);
|
||||
SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AArch64ISD::SVE_LD3_MERGE_ZERO: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedLoad(Node, 3, AArch64::LD3B_IMM);
|
||||
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedLoad(Node, 3, AArch64::LD3H_IMM);
|
||||
SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedLoad(Node, 3, AArch64::LD3W_IMM);
|
||||
SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedLoad(Node, 3, AArch64::LD3D_IMM);
|
||||
SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case AArch64ISD::SVE_LD4_MERGE_ZERO: {
|
||||
if (VT == MVT::nxv16i8) {
|
||||
SelectPredicatedLoad(Node, 4, AArch64::LD4B_IMM);
|
||||
SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
|
||||
return;
|
||||
} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
|
||||
(VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
|
||||
SelectPredicatedLoad(Node, 4, AArch64::LD4H_IMM);
|
||||
SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
|
||||
return;
|
||||
} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
|
||||
SelectPredicatedLoad(Node, 4, AArch64::LD4W_IMM);
|
||||
SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
|
||||
return;
|
||||
} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
|
||||
SelectPredicatedLoad(Node, 4, AArch64::LD4D_IMM);
|
||||
SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
@ -4805,10 +4800,14 @@ FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
|
||||
|
||||
/// When \p PredVT is a scalable vector predicate in the form
|
||||
/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
|
||||
/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. If the input
|
||||
/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
|
||||
/// structured vectors (NumVec >1), the output data type is
|
||||
/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
|
||||
/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
|
||||
/// EVT.
|
||||
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
|
||||
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
|
||||
unsigned NumVec) {
|
||||
assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
|
||||
if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
|
||||
return EVT();
|
||||
|
||||
@ -4818,7 +4817,8 @@ static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT) {
|
||||
|
||||
ElementCount EC = PredVT.getVectorElementCount();
|
||||
EVT ScalarVT = EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.Min);
|
||||
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC);
|
||||
EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
|
||||
|
||||
return MemVT;
|
||||
}
|
||||
|
||||
@ -4842,6 +4842,15 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
|
||||
return cast<VTSDNode>(Root->getOperand(3))->getVT();
|
||||
case AArch64ISD::ST1_PRED:
|
||||
return cast<VTSDNode>(Root->getOperand(4))->getVT();
|
||||
case AArch64ISD::SVE_LD2_MERGE_ZERO:
|
||||
return getPackedVectorTypeFromPredicateType(
|
||||
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
|
||||
case AArch64ISD::SVE_LD3_MERGE_ZERO:
|
||||
return getPackedVectorTypeFromPredicateType(
|
||||
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
|
||||
case AArch64ISD::SVE_LD4_MERGE_ZERO:
|
||||
return getPackedVectorTypeFromPredicateType(
|
||||
Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -4857,7 +4866,7 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
|
||||
// We are using an SVE prefetch intrinsic. Type must be inferred
|
||||
// from the width of the predicate.
|
||||
return getPackedVectorTypeFromPredicateType(
|
||||
Ctx, Root->getOperand(2)->getValueType(0));
|
||||
Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
|
||||
}
|
||||
|
||||
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
|
||||
|
@ -932,8 +932,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
if (VT.getScalarType() == MVT::i1)
|
||||
if (VT.getScalarType() == MVT::i1) {
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -8858,6 +8861,16 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (VT.getScalarType() == MVT::i1) {
|
||||
// Lower i1 truncate to `(x & 1) != 0`.
|
||||
SDLoc dl(Op);
|
||||
EVT OpVT = Op.getOperand(0).getValueType();
|
||||
SDValue Zero = DAG.getConstant(0, dl, OpVT);
|
||||
SDValue One = DAG.getConstant(1, dl, OpVT);
|
||||
SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
|
||||
return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
|
||||
}
|
||||
|
||||
if (!VT.isVector() || VT.isScalableVector())
|
||||
return Op;
|
||||
|
||||
@ -12288,6 +12301,9 @@ static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
"Unsupported opcode.");
|
||||
SDLoc DL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
if (VT == MVT::nxv8bf16 &&
|
||||
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
|
||||
return SDValue();
|
||||
|
||||
EVT LoadVT = VT;
|
||||
if (VT.isFloatingPoint())
|
||||
@ -14909,6 +14925,11 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
|
||||
if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
|
||||
return true;
|
||||
|
||||
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
|
||||
if (isa<ScalableVectorType>(AI->getAllocatedType()))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -495,6 +495,9 @@ def SImmS4XForm : SDNodeXForm<imm, [{
|
||||
def SImmS16XForm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getSExtValue() / 16, SDLoc(N), MVT::i64);
|
||||
}]>;
|
||||
def SImmS32XForm : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant(N->getSExtValue() / 32, SDLoc(N), MVT::i64);
|
||||
}]>;
|
||||
|
||||
// simm6sN predicate - True if the immediate is a multiple of N in the range
|
||||
// [-32 * N, 31 * N].
|
||||
@ -546,7 +549,7 @@ def simm4s16 : Operand<i64>, ImmLeaf<i64,
|
||||
let DecoderMethod = "DecodeSImm<4>";
|
||||
}
|
||||
def simm4s32 : Operand<i64>, ImmLeaf<i64,
|
||||
[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }]> {
|
||||
[{ return Imm >=-256 && Imm <= 224 && (Imm % 32) == 0x0; }], SImmS32XForm> {
|
||||
let PrintMethod = "printImmScale<32>";
|
||||
let ParserMatchClass = SImm4s32Operand;
|
||||
let DecoderMethod = "DecodeSImm<4>";
|
||||
|
@ -2744,6 +2744,35 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy a Z register pair by copying the individual sub-registers.
|
||||
if (AArch64::ZPR2RegClass.contains(DestReg) &&
|
||||
AArch64::ZPR2RegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
|
||||
Indices);
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy a Z register triple by copying the individual sub-registers.
|
||||
if (AArch64::ZPR3RegClass.contains(DestReg) &&
|
||||
AArch64::ZPR3RegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
|
||||
AArch64::zsub2};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
|
||||
Indices);
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy a Z register quad by copying the individual sub-registers.
|
||||
if (AArch64::ZPR4RegClass.contains(DestReg) &&
|
||||
AArch64::ZPR4RegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
|
||||
AArch64::zsub2, AArch64::zsub3};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
|
||||
Indices);
|
||||
return;
|
||||
}
|
||||
|
||||
if (AArch64::GPR64spRegClass.contains(DestReg) &&
|
||||
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
|
||||
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
|
||||
|
@ -40,6 +40,14 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
|
||||
AArch64_MC::initLLVMToCVRegMapping(this);
|
||||
}
|
||||
|
||||
static bool hasSVEArgsOrReturn(const MachineFunction *MF) {
|
||||
const Function &F = MF->getFunction();
|
||||
return isa<ScalableVectorType>(F.getReturnType()) ||
|
||||
any_of(F.args(), [](const Argument &Arg) {
|
||||
return isa<ScalableVectorType>(Arg.getType());
|
||||
});
|
||||
}
|
||||
|
||||
const MCPhysReg *
|
||||
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
assert(MF && "Invalid MachineFunction pointer.");
|
||||
@ -75,6 +83,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
// This is for OSes other than Windows; Windows is a separate case further
|
||||
// above.
|
||||
return CSR_AArch64_AAPCS_X18_SaveList;
|
||||
if (hasSVEArgsOrReturn(MF))
|
||||
return CSR_AArch64_SVE_AAPCS_SaveList;
|
||||
return CSR_AArch64_AAPCS_SaveList;
|
||||
}
|
||||
|
||||
@ -343,6 +353,15 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||
if (MFI.hasVarSizedObjects() || MF.hasEHFunclets()) {
|
||||
if (needsStackRealignment(MF))
|
||||
return true;
|
||||
|
||||
if (MF.getSubtarget<AArch64Subtarget>().hasSVE()) {
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
// Frames that have variable sized objects and scalable SVE objects,
|
||||
// should always use a basepointer.
|
||||
if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE())
|
||||
return true;
|
||||
}
|
||||
|
||||
// Conservatively estimate whether the negative offset from the frame
|
||||
// pointer will be sufficient to reach. If a function has a smallish
|
||||
// frame, it's less likely to have lots of spills and callee saved
|
||||
@ -379,8 +398,15 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
|
||||
// (closer to SP).
|
||||
//
|
||||
// The beginning works most reliably if we have a frame pointer.
|
||||
// In the presence of any non-constant space between FP and locals,
|
||||
// (e.g. in case of stack realignment or a scalable SVE area), it is
|
||||
// better to use SP or BP.
|
||||
const AArch64FrameLowering &TFI = *getFrameLowering(MF);
|
||||
return TFI.hasFP(MF);
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
assert((!MF.getSubtarget<AArch64Subtarget>().hasSVE() ||
|
||||
AFI->hasCalculatedStackSizeSVE()) &&
|
||||
"Expected SVE area to be calculated by this point");
|
||||
return TFI.hasFP(MF) && !needsStackRealignment(MF) && !AFI->getStackSizeSVE();
|
||||
}
|
||||
|
||||
bool AArch64RegisterInfo::requiresFrameIndexScavenging(
|
||||
|
@ -1109,6 +1109,28 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||
defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
|
||||
defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
|
||||
|
||||
// Extract lo/hi halves of legal predicate types.
|
||||
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_S PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
|
||||
(ZIP2_PPP_S PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_H PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
|
||||
(ZIP2_PPP_H PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
|
||||
(ZIP1_PPP_B PPR:$Ps, (PFALSE))>;
|
||||
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
|
||||
(ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
|
||||
|
||||
// Concatenate two predicates.
|
||||
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
|
||||
(UZP1_PPP_S $p1, $p2)>;
|
||||
def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)),
|
||||
(UZP1_PPP_H $p1, $p2)>;
|
||||
def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
|
||||
(UZP1_PPP_B $p1, $p2)>;
|
||||
|
||||
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
|
||||
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
|
||||
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
|
||||
|
@ -7718,9 +7718,13 @@ multiclass sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand listty,
|
||||
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, simm4s32:$imm4), 0>;
|
||||
|
||||
// Base addressing mode
|
||||
def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$gp), GPR64sp:$base)),
|
||||
(!cast<Instruction>(NAME) PPR3bAny:$gp, GPR64sp:$base, (i64 0))>;
|
||||
|
||||
def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), GPR64sp:$base)),
|
||||
(!cast<Instruction>(NAME) PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
|
||||
let AddedComplexity = 2 in {
|
||||
// Reg + Imm addressing mode
|
||||
def : Pat<(Ty (Ld1ro (PredTy PPR3bAny:$Pg), (add GPR64:$base, (i64 simm4s32:$imm)))),
|
||||
(!cast<Instruction>(NAME) $Pg, $base, simm4s32:$imm)>;
|
||||
}
|
||||
}
|
||||
|
||||
class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList,
|
||||
|
@ -9111,13 +9111,15 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
|
||||
Op0.getOperand(1));
|
||||
}
|
||||
|
||||
static const SDValue *getNormalLoadInput(const SDValue &Op) {
|
||||
static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
|
||||
const SDValue *InputLoad = &Op;
|
||||
if (InputLoad->getOpcode() == ISD::BITCAST)
|
||||
InputLoad = &InputLoad->getOperand(0);
|
||||
if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
|
||||
InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED)
|
||||
InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
|
||||
IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
|
||||
InputLoad = &InputLoad->getOperand(0);
|
||||
}
|
||||
if (InputLoad->getOpcode() != ISD::LOAD)
|
||||
return nullptr;
|
||||
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
|
||||
@ -9289,7 +9291,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
|
||||
if (!BVNIsConstantSplat || SplatBitSize > 32) {
|
||||
|
||||
const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
|
||||
bool IsPermutedLoad = false;
|
||||
const SDValue *InputLoad =
|
||||
getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
|
||||
// Handle load-and-splat patterns as we have instructions that will do this
|
||||
// in one go.
|
||||
if (InputLoad && DAG.isSplatValue(Op, true)) {
|
||||
@ -9912,7 +9916,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
// If this is a load-and-splat, we can do that with a single instruction
|
||||
// in some cases. However if the load has multiple uses, we don't want to
|
||||
// combine it because that will just produce multiple loads.
|
||||
const SDValue *InputLoad = getNormalLoadInput(V1);
|
||||
bool IsPermutedLoad = false;
|
||||
const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
|
||||
if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
|
||||
(PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
|
||||
InputLoad->hasOneUse()) {
|
||||
@ -9920,6 +9925,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
int SplatIdx =
|
||||
PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
|
||||
|
||||
// The splat index for permuted loads will be in the left half of the vector
|
||||
// which is strictly wider than the loaded value by 8 bytes. So we need to
|
||||
// adjust the splat index to point to the correct address in memory.
|
||||
if (IsPermutedLoad) {
|
||||
assert(isLittleEndian && "Unexpected permuted load on big endian target");
|
||||
SplatIdx += IsFourByte ? 2 : 1;
|
||||
assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
|
||||
"Splat of a value outside of the loaded memory");
|
||||
}
|
||||
|
||||
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
|
||||
// For 4-byte load-and-splat, we need Power9.
|
||||
if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
|
||||
@ -9929,10 +9944,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
else
|
||||
Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
|
||||
|
||||
// If we are loading a partial vector, it does not make sense to adjust
|
||||
// the base pointer. This happens with (splat (s_to_v_permuted (ld))).
|
||||
if (LD->getMemoryVT().getSizeInBits() == (IsFourByte ? 32 : 64))
|
||||
Offset = 0;
|
||||
SDValue BasePtr = LD->getBasePtr();
|
||||
if (Offset != 0)
|
||||
BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
|
||||
|
@ -184,6 +184,330 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a SLOI (Shift Left Ones Immediate). We first check that
|
||||
// it is the right node tree:
|
||||
//
|
||||
// (OR (SHL RS1, VC2), VC1)
|
||||
//
|
||||
// and then we check that VC1, the mask used to fill with ones, is compatible
|
||||
// with VC2, the shamt:
|
||||
//
|
||||
// VC1 == maskTrailingOnes<uint64_t>(VC2)
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
MVT XLenVT = Subtarget->getXLenVT();
|
||||
if (N.getOpcode() == ISD::OR) {
|
||||
SDValue Or = N;
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SHL) {
|
||||
SDValue Shl = Or.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Or.getOperand(1))) {
|
||||
if (XLenVT == MVT::i64) {
|
||||
uint64_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint64_t VC2 = Shl.getConstantOperandVal(1);
|
||||
if (VC1 == maskTrailingOnes<uint64_t>(VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Shl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (XLenVT == MVT::i32) {
|
||||
uint32_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Shl.getConstantOperandVal(1);
|
||||
if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Shl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a SROI (Shift Right Ones Immediate). We first check that
|
||||
// it is the right node tree:
|
||||
//
|
||||
// (OR (SRL RS1, VC2), VC1)
|
||||
//
|
||||
// and then we check that VC1, the mask used to fill with ones, is compatible
|
||||
// with VC2, the shamt:
|
||||
//
|
||||
// VC1 == maskLeadingOnes<uint64_t>(VC2)
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
MVT XLenVT = Subtarget->getXLenVT();
|
||||
if (N.getOpcode() == ISD::OR) {
|
||||
SDValue Or = N;
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SRL) {
|
||||
SDValue Srl = Or.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Or.getOperand(1))) {
|
||||
if (XLenVT == MVT::i64) {
|
||||
uint64_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint64_t VC2 = Srl.getConstantOperandVal(1);
|
||||
if (VC1 == maskLeadingOnes<uint64_t>(VC2)) {
|
||||
RS1 = Srl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Srl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (XLenVT == MVT::i32) {
|
||||
uint32_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Srl.getConstantOperandVal(1);
|
||||
if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Srl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Srl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a RORI (Rotate Right Immediate). We first check that
|
||||
// it is the right node tree:
|
||||
//
|
||||
// (ROTL RS1, VC)
|
||||
//
|
||||
// The compiler translates immediate rotations to the right given by the call
|
||||
// to the rotateright32/rotateright64 intrinsics as rotations to the left.
|
||||
// Since the rotation to the left can be easily emulated as a rotation to the
|
||||
// right by negating the constant, there is no encoding for ROLI.
|
||||
// We then select the immediate left rotations as RORI by the complementary
|
||||
// constant:
|
||||
//
|
||||
// Shamt == XLen - VC
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
MVT XLenVT = Subtarget->getXLenVT();
|
||||
if (N.getOpcode() == ISD::ROTL) {
|
||||
if (isa<ConstantSDNode>(N.getOperand(1))) {
|
||||
if (XLenVT == MVT::i64) {
|
||||
uint64_t VC = N.getConstantOperandVal(1);
|
||||
Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
|
||||
N.getOperand(1).getValueType());
|
||||
RS1 = N.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
if (XLenVT == MVT::i32) {
|
||||
uint32_t VC = N.getConstantOperandVal(1);
|
||||
Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
|
||||
N.getOperand(1).getValueType());
|
||||
RS1 = N.getOperand(0);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
|
||||
// on RV64).
|
||||
// SLLIUW is the same as SLLI except for the fact that it clears the bits
|
||||
// XLEN-1:32 of the input RS1 before shifting.
|
||||
// We first check that it is the right node tree:
|
||||
//
|
||||
// (AND (SHL RS1, VC2), VC1)
|
||||
//
|
||||
// We check that VC2, the shamt is less than 32, otherwise the pattern is
|
||||
// exactly the same as SLLI and we give priority to that.
|
||||
// Eventually we check that that VC1, the mask used to clear the upper 32 bits
|
||||
// of RS1, is correct:
|
||||
//
|
||||
// VC1 == (0xFFFFFFFF << VC2)
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) {
|
||||
SDValue And = N;
|
||||
if (And.getOperand(0).getOpcode() == ISD::SHL) {
|
||||
SDValue Shl = And.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(And.getOperand(1))) {
|
||||
uint64_t VC1 = And.getConstantOperandVal(1);
|
||||
uint64_t VC2 = Shl.getConstantOperandVal(1);
|
||||
if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Shl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64).
|
||||
// We first check that it is the right node tree:
|
||||
//
|
||||
// (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1))
|
||||
//
|
||||
// and then we check that VC1, the mask used to fill with ones, is compatible
|
||||
// with VC2, the shamt:
|
||||
//
|
||||
// VC1 == maskTrailingOnes<uint32_t>(VC2)
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
if (Subtarget->getXLenVT() == MVT::i64 &&
|
||||
N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
||||
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
|
||||
if (N.getOperand(0).getOpcode() == ISD::OR) {
|
||||
SDValue Or = N.getOperand(0);
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SHL) {
|
||||
SDValue Shl = Or.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Or.getOperand(1))) {
|
||||
uint32_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Shl.getConstantOperandVal(1);
|
||||
if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Shl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
|
||||
// We first check that it is the right node tree:
|
||||
//
|
||||
// (OR (SHL RS1, VC2), VC1)
|
||||
//
|
||||
// and then we check that VC1, the mask used to fill with ones, is compatible
|
||||
// with VC2, the shamt:
|
||||
//
|
||||
// VC1 == maskLeadingOnes<uint32_t>(VC2)
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) {
|
||||
SDValue Or = N;
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SRL) {
|
||||
SDValue Srl = Or.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Or.getOperand(1))) {
|
||||
uint32_t VC1 = Or.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Srl.getConstantOperandVal(1);
|
||||
if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Srl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
|
||||
Srl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
|
||||
// We first check that it is the right node tree:
|
||||
//
|
||||
// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
|
||||
// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
|
||||
//
|
||||
// Then we check that the constant operands respect these constraints:
|
||||
//
|
||||
// VC2 == 32 - VC1
|
||||
// VC3 == maskLeadingOnes<uint32_t>(VC2)
|
||||
//
|
||||
// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
|
||||
// and VC3 a 32 bit mask of (32 - VC1) leading ones.
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
|
||||
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
||||
Subtarget->getXLenVT() == MVT::i64 &&
|
||||
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
|
||||
if (N.getOperand(0).getOpcode() == ISD::OR) {
|
||||
SDValue Or = N.getOperand(0);
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SHL &&
|
||||
Or.getOperand(1).getOpcode() == ISD::SRL) {
|
||||
SDValue Shl = Or.getOperand(0);
|
||||
SDValue Srl = Or.getOperand(1);
|
||||
if (Srl.getOperand(0).getOpcode() == ISD::AND) {
|
||||
SDValue And = Srl.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(And.getOperand(1))) {
|
||||
uint32_t VC1 = Srl.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Shl.getConstantOperandVal(1);
|
||||
uint32_t VC3 = And.getConstantOperandVal(1);
|
||||
if (VC2 == (32 - VC1) &&
|
||||
VC3 == maskLeadingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
|
||||
Srl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64).
|
||||
// We first check that it is the right node tree:
|
||||
//
|
||||
// (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
|
||||
// (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
|
||||
//
|
||||
// Then we check that the constant operands respect these constraints:
|
||||
//
|
||||
// VC2 == 32 - VC1
|
||||
// VC3 == maskLeadingOnes<uint32_t>(VC2)
|
||||
//
|
||||
// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
|
||||
// and VC3 a 32 bit mask of (32 - VC1) leading ones.
|
||||
|
||||
bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2,
|
||||
SDValue &Shamt) {
|
||||
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
|
||||
Subtarget->getXLenVT() == MVT::i64 &&
|
||||
cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
|
||||
if (N.getOperand(0).getOpcode() == ISD::OR) {
|
||||
SDValue Or = N.getOperand(0);
|
||||
if (Or.getOperand(0).getOpcode() == ISD::SHL &&
|
||||
Or.getOperand(1).getOpcode() == ISD::SRL) {
|
||||
SDValue Shl = Or.getOperand(0);
|
||||
SDValue Srl = Or.getOperand(1);
|
||||
if (Srl.getOperand(0).getOpcode() == ISD::AND) {
|
||||
SDValue And = Srl.getOperand(0);
|
||||
if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(Shl.getOperand(1)) &&
|
||||
isa<ConstantSDNode>(And.getOperand(1))) {
|
||||
uint32_t VC1 = Srl.getConstantOperandVal(1);
|
||||
uint32_t VC2 = Shl.getConstantOperandVal(1);
|
||||
uint32_t VC3 = And.getConstantOperandVal(1);
|
||||
if (VC2 == (32 - VC1) &&
|
||||
VC3 == maskLeadingOnes<uint32_t>(VC2)) {
|
||||
RS1 = Shl.getOperand(0);
|
||||
RS2 = And.getOperand(0);
|
||||
Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
|
||||
Srl.getOperand(1).getValueType());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Merge an ADDI into the offset of a load/store instruction where possible.
|
||||
// (load (addi base, off1), off2) -> (load base, off1+off2)
|
||||
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
|
||||
|
@ -45,6 +45,15 @@ public:
|
||||
|
||||
bool SelectAddrFI(SDValue Addr, SDValue &Base);
|
||||
|
||||
bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
|
||||
bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "RISCVGenDAGISel.inc"
|
||||
|
||||
|
@ -149,12 +149,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
|
||||
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
|
||||
|
||||
setOperationAction(ISD::ROTL, XLenVT, Expand);
|
||||
setOperationAction(ISD::ROTR, XLenVT, Expand);
|
||||
setOperationAction(ISD::BSWAP, XLenVT, Expand);
|
||||
setOperationAction(ISD::CTTZ, XLenVT, Expand);
|
||||
setOperationAction(ISD::CTLZ, XLenVT, Expand);
|
||||
setOperationAction(ISD::CTPOP, XLenVT, Expand);
|
||||
if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
|
||||
setOperationAction(ISD::ROTL, XLenVT, Expand);
|
||||
setOperationAction(ISD::ROTR, XLenVT, Expand);
|
||||
}
|
||||
|
||||
if (!Subtarget.hasStdExtZbp())
|
||||
setOperationAction(ISD::BSWAP, XLenVT, Expand);
|
||||
|
||||
if (!Subtarget.hasStdExtZbb()) {
|
||||
setOperationAction(ISD::CTTZ, XLenVT, Expand);
|
||||
setOperationAction(ISD::CTLZ, XLenVT, Expand);
|
||||
setOperationAction(ISD::CTPOP, XLenVT, Expand);
|
||||
}
|
||||
|
||||
if (Subtarget.hasStdExtZbp())
|
||||
setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
|
||||
|
||||
if (Subtarget.hasStdExtZbt()) {
|
||||
setOperationAction(ISD::FSHL, XLenVT, Legal);
|
||||
setOperationAction(ISD::FSHR, XLenVT, Legal);
|
||||
}
|
||||
|
||||
ISD::CondCode FPCCToExtend[] = {
|
||||
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
|
||||
|
@ -632,3 +632,432 @@ let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] i
|
||||
def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
|
||||
(C_ZEXTW GPRC:$rs1)>;
|
||||
} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Codegen patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SLOIPat : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
|
||||
def SROIPat : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
|
||||
def RORIPat : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
|
||||
def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
|
||||
def SLOIWPat : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
|
||||
def SROIWPat : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
|
||||
def RORIWPat : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
|
||||
def FSRIWPat : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>;
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp] in {
|
||||
def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbbOrZbp]
|
||||
|
||||
let Predicates = [HasStdExtZbb] in {
|
||||
def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
|
||||
(SLO GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
|
||||
(SRO GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbb]
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp] in {
|
||||
def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbbOrZbp]
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV32] in
|
||||
def : Pat<(and (xor (shl 1, (and GPR:$rs2, 31)), -1), GPR:$rs1),
|
||||
(SBCLR GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbs, IsRV64] in
|
||||
def : Pat<(and (xor (shl 1, (and GPR:$rs2, 63)), -1), GPR:$rs1),
|
||||
(SBCLR GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbs] in
|
||||
def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (SBCLR GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV32] in
|
||||
def : Pat<(or (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
|
||||
(SBSET GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbs, IsRV64] in
|
||||
def : Pat<(or (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
|
||||
(SBSET GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV32] in
|
||||
def : Pat<(xor (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
|
||||
(SBINV GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbs, IsRV64] in
|
||||
def : Pat<(xor (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
|
||||
(SBINV GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV32] in
|
||||
def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 31)), 1),
|
||||
(SBEXT GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV64] in
|
||||
def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 63)), 1),
|
||||
(SBEXT GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbb] in {
|
||||
def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
} // Predicates = [HasStdExtZbb]
|
||||
|
||||
// There's no encoding for roli in the current version of the 'B' extension
|
||||
// (v0.92) as it can be implemented with rori by negating the immediate.
|
||||
// For this reason we pattern-match only against rori[w].
|
||||
let Predicates = [HasStdExtZbbOrZbp] in
|
||||
def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
|
||||
// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are
|
||||
// pattern-matched by simple andi, ori, and xori.
|
||||
let Predicates = [HasStdExtZbs] in
|
||||
def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
|
||||
(SBEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV32] in {
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
|
||||
(GORCI GPR:$rs1, (i32 1))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))),
|
||||
(GORCI GPR:$rs1, (i32 2))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))),
|
||||
(GORCI GPR:$rs1, (i32 4))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))),
|
||||
(GORCI GPR:$rs1, (i32 8))>;
|
||||
def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1),
|
||||
(shl GPR:$rs1, (i32 16))),
|
||||
(GORCI GPR:$rs1, (i32 16))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV32]
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV64] in {
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))),
|
||||
(GORCI GPR:$rs1, (i64 1))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))),
|
||||
(GORCI GPR:$rs1, (i64 2))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))),
|
||||
(GORCI GPR:$rs1, (i64 4))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))),
|
||||
(GORCI GPR:$rs1, (i64 8))>;
|
||||
def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))),
|
||||
(GORCI GPR:$rs1, (i64 16))>;
|
||||
def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1),
|
||||
(shl GPR:$rs1, (i64 32))),
|
||||
(GORCI GPR:$rs1, (i64 32))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV32] in {
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)),
|
||||
(and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))),
|
||||
(GREVI GPR:$rs1, (i32 1))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)),
|
||||
(and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))),
|
||||
(GREVI GPR:$rs1, (i32 2))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)),
|
||||
(and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))),
|
||||
(GREVI GPR:$rs1, (i32 4))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
|
||||
(and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
|
||||
(GREVI GPR:$rs1, (i32 8))>;
|
||||
def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
|
||||
def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
|
||||
(GREVI GPR:$rs1, (i32 16))>;
|
||||
def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
|
||||
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
|
||||
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV32]
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV64] in {
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)),
|
||||
(and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))),
|
||||
(GREVI GPR:$rs1, (i64 1))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)),
|
||||
(and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))),
|
||||
(GREVI GPR:$rs1, (i64 2))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)),
|
||||
(and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))),
|
||||
(GREVI GPR:$rs1, (i64 4))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
|
||||
(and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))),
|
||||
(GREVI GPR:$rs1, (i64 8))>;
|
||||
def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
|
||||
(and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
|
||||
(GREVI GPR:$rs1, (i64 16))>;
|
||||
def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
|
||||
(GREVI GPR:$rs1, (i64 32))>;
|
||||
def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
|
||||
def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
|
||||
def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbt] in {
|
||||
def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
|
||||
(CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1),
|
||||
(CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3),
|
||||
(FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3),
|
||||
(FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
|
||||
(FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
|
||||
} // Predicates = [HasStdExtZbt]
|
||||
|
||||
let Predicates = [HasStdExtZbb] in {
|
||||
def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
|
||||
def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
|
||||
def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
|
||||
} // Predicates = [HasStdExtZbb]
|
||||
|
||||
let Predicates = [HasStdExtZbb, IsRV32] in
|
||||
def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>;
|
||||
let Predicates = [HasStdExtZbb, IsRV64] in
|
||||
def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>;
|
||||
|
||||
let Predicates = [HasStdExtZbb, IsRV32] in
|
||||
def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>;
|
||||
let Predicates = [HasStdExtZbb, IsRV64] in
|
||||
def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>;
|
||||
|
||||
let Predicates = [HasStdExtZbb] in {
|
||||
def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2),
|
||||
(MIN GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2),
|
||||
(MAX GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2),
|
||||
(MINU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
|
||||
(MAXU GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbb]
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
|
||||
def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))),
|
||||
(PACK GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
|
||||
def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))),
|
||||
(PACK GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
|
||||
def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))),
|
||||
(PACKU GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
|
||||
def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))),
|
||||
(PACKU GPR:$rs1, GPR:$rs2)>;
|
||||
let Predicates = [HasStdExtZbbOrZbp] in
|
||||
def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00),
|
||||
(and GPR:$rs1, 0x00FF)),
|
||||
(PACKH GPR:$rs1, GPR:$rs2)>;
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV32] in {
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
|
||||
(and GPR:$rs1, (i32 0xFF0000FF))),
|
||||
(and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))),
|
||||
(SHFLI GPR:$rs1, (i32 8))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)),
|
||||
(and GPR:$rs1, (i32 0xF00FF00F))),
|
||||
(and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))),
|
||||
(SHFLI GPR:$rs1, (i32 4))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)),
|
||||
(and GPR:$rs1, (i32 0xC3C3C3C3))),
|
||||
(and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))),
|
||||
(SHFLI GPR:$rs1, (i32 2))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)),
|
||||
(and GPR:$rs1, (i32 0x99999999))),
|
||||
(and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))),
|
||||
(SHFLI GPR:$rs1, (i32 1))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV32]
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV64] in {
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)),
|
||||
(and GPR:$rs1, (i64 0xFFFF00000000FFFF))),
|
||||
(and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))),
|
||||
(SHFLI GPR:$rs1, (i64 16))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)),
|
||||
(and GPR:$rs1, (i64 0xFF0000FFFF0000FF))),
|
||||
(and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))),
|
||||
(SHFLI GPR:$rs1, (i64 8))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)),
|
||||
(and GPR:$rs1, (i64 0xF00FF00FF00FF00F))),
|
||||
(and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))),
|
||||
(SHFLI GPR:$rs1, (i64 4))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)),
|
||||
(and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))),
|
||||
(and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))),
|
||||
(SHFLI GPR:$rs1, (i64 2))>;
|
||||
def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
|
||||
(and GPR:$rs1, (i64 0x9999999999999999))),
|
||||
(and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))),
|
||||
(SHFLI GPR:$rs1, (i64 1))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbb, IsRV64] in {
|
||||
def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
|
||||
(ADDIWU GPR:$rs, simm12:$simm12)>;
|
||||
def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
|
||||
(ADDWU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
|
||||
(SUBWU GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
|
||||
(ADDUW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
|
||||
(SUBUW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
|
||||
(SLOW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
|
||||
(SROW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbb, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
|
||||
def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
|
||||
(riscv_srlw (assertsexti32 GPR:$rs1),
|
||||
(sub (i64 0), (assertsexti32 GPR:$rs2)))),
|
||||
(ROLW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
|
||||
(sub (i64 0), (assertsexti32 GPR:$rs2))),
|
||||
(riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))),
|
||||
(RORW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbs, IsRV64] in {
|
||||
def : Pat<(and (xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), -1),
|
||||
(assertsexti32 GPR:$rs1)),
|
||||
(SBCLRW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(or (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
|
||||
(assertsexti32 GPR:$rs1)),
|
||||
(SBSETW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
|
||||
(assertsexti32 GPR:$rs1)),
|
||||
(SBINVW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(and (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
|
||||
1),
|
||||
(SBEXTW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbs, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbb, IsRV64] in {
|
||||
def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
} // Predicates = [HasStdExtZbb, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
|
||||
def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
|
||||
(RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
|
||||
|
||||
let Predicates = [HasStdExtZbp, IsRV64] in {
|
||||
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))),
|
||||
i32),
|
||||
(GORCIW GPR:$rs1, (i64 1))>;
|
||||
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))),
|
||||
i32),
|
||||
(GORCIW GPR:$rs1, (i64 2))>;
|
||||
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))),
|
||||
i32),
|
||||
(GORCIW GPR:$rs1, (i64 4))>;
|
||||
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))),
|
||||
i32),
|
||||
(GORCIW GPR:$rs1, (i64 8))>;
|
||||
def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)),
|
||||
GPR:$rs1),
|
||||
(and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))),
|
||||
i32),
|
||||
(GORCIW GPR:$rs1, (i64 16))>;
|
||||
def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)),
|
||||
GPR:$rs1),
|
||||
(shl GPR:$rs1, (i64 16))), i32),
|
||||
(GORCIW GPR:$rs1, (i64 16))>;
|
||||
|
||||
def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)),
|
||||
(and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))),
|
||||
i32),
|
||||
(GREVIW GPR:$rs1, (i64 1))>;
|
||||
def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)),
|
||||
(and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))),
|
||||
i32),
|
||||
(GREVIW GPR:$rs1, (i64 2))>;
|
||||
def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)),
|
||||
(and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))),
|
||||
i32),
|
||||
(GREVIW GPR:$rs1, (i64 4))>;
|
||||
def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)),
|
||||
(and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))),
|
||||
i32),
|
||||
(GREVIW GPR:$rs1, (i64 8))>;
|
||||
def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)),
|
||||
(srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32),
|
||||
(GREVIW GPR:$rs1, (i64 16))>;
|
||||
def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
|
||||
def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
|
||||
} // Predicates = [HasStdExtZbp, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbt, IsRV64] in {
|
||||
def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
|
||||
(i64 0),
|
||||
(i64 17),
|
||||
(assertsexti32 GPR:$rs1),
|
||||
(or (riscv_sllw (assertsexti32 GPR:$rs1),
|
||||
(and (assertsexti32 GPR:$rs3), 31)),
|
||||
(riscv_srlw (assertsexti32 GPR:$rs2),
|
||||
(sub (i64 32),
|
||||
(assertsexti32 GPR:$rs3))))),
|
||||
(FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
|
||||
(i64 0),
|
||||
(i64 17),
|
||||
(assertsexti32 GPR:$rs2),
|
||||
(or (riscv_sllw (assertsexti32 GPR:$rs1),
|
||||
(sub (i64 32),
|
||||
(assertsexti32 GPR:$rs3))),
|
||||
(riscv_srlw (assertsexti32 GPR:$rs2),
|
||||
(and (assertsexti32 GPR:$rs3), 31)))),
|
||||
(FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
|
||||
def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
|
||||
(FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
|
||||
} // Predicates = [HasStdExtZbt, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbb, IsRV64] in {
|
||||
def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
|
||||
(CLZW GPR:$rs1)>;
|
||||
// We don't pattern-match CTZW here as it has the same pattern and result as
|
||||
// RV64 CTZ
|
||||
def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
|
||||
} // Predicates = [HasStdExtZbb, IsRV64]
|
||||
|
||||
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
|
||||
def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)),
|
||||
(and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)),
|
||||
i32),
|
||||
(PACKW GPR:$rs1, GPR:$rs2)>;
|
||||
def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
|
||||
(srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000),
|
||||
(i64 16))),
|
||||
(PACKUW GPR:$rs1, GPR:$rs2)>;
|
||||
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
|
||||
|
@ -30953,6 +30953,34 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
|
||||
// X86 Scheduler Hooks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Returns true if EFLAG is consumed after this iterator in the rest of the
|
||||
// basic block or any successors of the basic block.
|
||||
static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
|
||||
MachineBasicBlock *BB) {
|
||||
// Scan forward through BB for a use/def of EFLAGS.
|
||||
for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end();
|
||||
miI != miE; ++miI) {
|
||||
const MachineInstr& mi = *miI;
|
||||
if (mi.readsRegister(X86::EFLAGS))
|
||||
return true;
|
||||
// If we found a def, we can stop searching.
|
||||
if (mi.definesRegister(X86::EFLAGS))
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we hit the end of the block, check whether EFLAGS is live into a
|
||||
// successor.
|
||||
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
|
||||
sEnd = BB->succ_end();
|
||||
sItr != sEnd; ++sItr) {
|
||||
MachineBasicBlock* succ = *sItr;
|
||||
if (succ->isLiveIn(X86::EFLAGS))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Utility function to emit xbegin specifying the start of an RTM region.
|
||||
static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
|
||||
const TargetInstrInfo *TII) {
|
||||
@ -30985,6 +31013,12 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
|
||||
MF->insert(I, fallMBB);
|
||||
MF->insert(I, sinkMBB);
|
||||
|
||||
if (isEFLAGSLiveAfter(MI, MBB)) {
|
||||
mainMBB->addLiveIn(X86::EFLAGS);
|
||||
fallMBB->addLiveIn(X86::EFLAGS);
|
||||
sinkMBB->addLiveIn(X86::EFLAGS);
|
||||
}
|
||||
|
||||
// Transfer the remainder of BB and its successor edges to sinkMBB.
|
||||
sinkMBB->splice(sinkMBB->begin(), MBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
|
||||
@ -31373,27 +31407,8 @@ MachineBasicBlock *X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||
static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
|
||||
MachineBasicBlock* BB,
|
||||
const TargetRegisterInfo* TRI) {
|
||||
// Scan forward through BB for a use/def of EFLAGS.
|
||||
MachineBasicBlock::iterator miI(std::next(SelectItr));
|
||||
for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
|
||||
const MachineInstr& mi = *miI;
|
||||
if (mi.readsRegister(X86::EFLAGS))
|
||||
return false;
|
||||
if (mi.definesRegister(X86::EFLAGS))
|
||||
break; // Should have kill-flag - update below.
|
||||
}
|
||||
|
||||
// If we hit the end of the block, check whether EFLAGS is live into a
|
||||
// successor.
|
||||
if (miI == BB->end()) {
|
||||
for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
|
||||
sEnd = BB->succ_end();
|
||||
sItr != sEnd; ++sItr) {
|
||||
MachineBasicBlock* succ = *sItr;
|
||||
if (succ->isLiveIn(X86::EFLAGS))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (isEFLAGSLiveAfter(SelectItr, BB))
|
||||
return false;
|
||||
|
||||
// We found a def, or hit the end of the basic block and EFLAGS wasn't live
|
||||
// out. SelectMI should have a kill flag on EFLAGS.
|
||||
@ -44349,8 +44364,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG,
|
||||
/// A horizontal-op B, for some already available A and B, and if so then LHS is
|
||||
/// set to A, RHS to B, and the routine returns 'true'.
|
||||
static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget,
|
||||
bool IsCommutative) {
|
||||
const X86Subtarget &Subtarget, bool IsCommutative,
|
||||
SmallVectorImpl<int> &PostShuffleMask) {
|
||||
// If either operand is undef, bail out. The binop should be simplified.
|
||||
if (LHS.isUndef() || RHS.isUndef())
|
||||
return false;
|
||||
@ -44443,6 +44458,12 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
RMask.push_back(i);
|
||||
}
|
||||
|
||||
// Avoid 128-bit lane crossing if pre-AVX2 and FP (integer will split).
|
||||
if (!Subtarget.hasAVX2() && VT.isFloatingPoint() &&
|
||||
(isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), LMask) ||
|
||||
isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), RMask)))
|
||||
return false;
|
||||
|
||||
// If A and B occur in reverse order in RHS, then canonicalize by commuting
|
||||
// RHS operands and shuffle mask.
|
||||
if (A != C) {
|
||||
@ -44453,6 +44474,9 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
if (!(A == C && B == D))
|
||||
return false;
|
||||
|
||||
PostShuffleMask.clear();
|
||||
PostShuffleMask.append(NumElts, SM_SentinelUndef);
|
||||
|
||||
// LHS and RHS are now:
|
||||
// LHS = shuffle A, B, LMask
|
||||
// RHS = shuffle A, B, RMask
|
||||
@ -44461,6 +44485,7 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
// so we just repeat the inner loop if this is a 256-bit op.
|
||||
unsigned Num128BitChunks = VT.getSizeInBits() / 128;
|
||||
unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks;
|
||||
unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
|
||||
assert((NumEltsPer128BitChunk % 2 == 0) &&
|
||||
"Vector type should have an even number of elements in each lane");
|
||||
for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) {
|
||||
@ -44472,25 +44497,40 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
|
||||
(!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
|
||||
continue;
|
||||
|
||||
// Check that successive odd/even elements are being operated on. If not,
|
||||
// this is not a horizontal operation.
|
||||
if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) &&
|
||||
!((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative))
|
||||
return false;
|
||||
|
||||
// Compute the post-shuffle mask index based on where the element
|
||||
// is stored in the HOP result, and where it needs to be moved to.
|
||||
int Base = LIdx & ~1u;
|
||||
int Index = ((Base % NumEltsPer128BitChunk) / 2) +
|
||||
((Base % NumElts) & ~(NumEltsPer128BitChunk - 1));
|
||||
|
||||
// The low half of the 128-bit result must choose from A.
|
||||
// The high half of the 128-bit result must choose from B,
|
||||
// unless B is undef. In that case, we are always choosing from A.
|
||||
unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2;
|
||||
unsigned Src = B.getNode() ? i >= NumEltsPer64BitChunk : 0;
|
||||
|
||||
// Check that successive elements are being operated on. If not, this is
|
||||
// not a horizontal operation.
|
||||
int Index = 2 * (i % NumEltsPer64BitChunk) + NumElts * Src + j;
|
||||
if (!(LIdx == Index && RIdx == Index + 1) &&
|
||||
!(IsCommutative && LIdx == Index + 1 && RIdx == Index))
|
||||
return false;
|
||||
if ((B && Base >= (int)NumElts) || (!B && i >= NumEltsPer64BitChunk))
|
||||
Index += NumEltsPer64BitChunk;
|
||||
PostShuffleMask[i + j] = Index;
|
||||
}
|
||||
}
|
||||
|
||||
LHS = A.getNode() ? A : B; // If A is 'UNDEF', use B for it.
|
||||
RHS = B.getNode() ? B : A; // If B is 'UNDEF', use A for it.
|
||||
|
||||
if (!shouldUseHorizontalOp(LHS == RHS && NumShuffles < 2, DAG, Subtarget))
|
||||
bool IsIdentityPostShuffle =
|
||||
isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0);
|
||||
if (IsIdentityPostShuffle)
|
||||
PostShuffleMask.clear();
|
||||
|
||||
// Assume a SingleSource HOP if we only shuffle one input and don't need to
|
||||
// shuffle the result.
|
||||
if (!shouldUseHorizontalOp(LHS == RHS &&
|
||||
(NumShuffles < 2 || !IsIdentityPostShuffle),
|
||||
DAG, Subtarget))
|
||||
return false;
|
||||
|
||||
LHS = DAG.getBitcast(VT, LHS);
|
||||
@ -44509,10 +44549,16 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
|
||||
assert((IsFadd || N->getOpcode() == ISD::FSUB) && "Wrong opcode");
|
||||
|
||||
// Try to synthesize horizontal add/sub from adds/subs of shuffles.
|
||||
SmallVector<int, 8> PostShuffleMask;
|
||||
if (((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
|
||||
(Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
|
||||
isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd))
|
||||
return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
|
||||
isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd, PostShuffleMask)) {
|
||||
SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
|
||||
if (!PostShuffleMask.empty())
|
||||
HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
|
||||
DAG.getUNDEF(VT), PostShuffleMask);
|
||||
return HorizBinOp;
|
||||
}
|
||||
|
||||
// NOTE: isHorizontalBinOp may have changed LHS/RHS variables.
|
||||
|
||||
@ -47605,17 +47651,22 @@ static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG,
|
||||
bool IsAdd = N->getOpcode() == ISD::ADD;
|
||||
assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
|
||||
|
||||
SmallVector<int, 8> PostShuffleMask;
|
||||
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
|
||||
VT == MVT::v8i32) &&
|
||||
Subtarget.hasSSSE3() &&
|
||||
isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) {
|
||||
isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd, PostShuffleMask)) {
|
||||
auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
|
||||
ArrayRef<SDValue> Ops) {
|
||||
return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB,
|
||||
DL, Ops[0].getValueType(), Ops);
|
||||
return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB, DL,
|
||||
Ops[0].getValueType(), Ops);
|
||||
};
|
||||
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
|
||||
HOpBuilder);
|
||||
SDValue HorizBinOp =
|
||||
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HOpBuilder);
|
||||
if (!PostShuffleMask.empty())
|
||||
HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp,
|
||||
DAG.getUNDEF(VT), PostShuffleMask);
|
||||
return HorizBinOp;
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -191,9 +191,11 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
|
||||
file_magic Magic = identify_magic(MB.getBuffer());
|
||||
|
||||
if (Magic != file_magic::coff_object && Magic != file_magic::bitcode &&
|
||||
Magic != file_magic::archive && Magic != file_magic::windows_resource) {
|
||||
Magic != file_magic::archive && Magic != file_magic::windows_resource &&
|
||||
Magic != file_magic::coff_import_library) {
|
||||
llvm::errs() << MB.getBufferIdentifier()
|
||||
<< ": not a COFF object, bitcode, archive or resource file\n";
|
||||
<< ": not a COFF object, bitcode, archive, import library or "
|
||||
"resource file\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -1030,12 +1030,6 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
|
||||
APInt DemandedElts,
|
||||
int DMaskIdx) {
|
||||
|
||||
// FIXME: Allow v3i16/v3f16 in buffer intrinsics when the types are fully supported.
|
||||
if (DMaskIdx < 0 &&
|
||||
II->getType()->getScalarSizeInBits() != 32 &&
|
||||
DemandedElts.getActiveBits() == 3)
|
||||
return nullptr;
|
||||
|
||||
auto *IIVTy = cast<VectorType>(II->getType());
|
||||
unsigned VWidth = IIVTy->getNumElements();
|
||||
if (VWidth == 1)
|
||||
@ -1124,6 +1118,11 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
|
||||
if (!NewNumElts)
|
||||
return UndefValue::get(II->getType());
|
||||
|
||||
// FIXME: Allow v3i16/v3f16 in buffer and image intrinsics when the types are
|
||||
// fully supported.
|
||||
if (II->getType()->getScalarSizeInBits() == 16 && NewNumElts == 3)
|
||||
return nullptr;
|
||||
|
||||
if (NewNumElts >= VWidth && DemandedElts.isMask()) {
|
||||
if (DMaskIdx >= 0)
|
||||
II->setArgOperand(DMaskIdx, Args[DMaskIdx]);
|
||||
|
@ -1543,7 +1543,7 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {
|
||||
Constant *C;
|
||||
if (match(&Inst,
|
||||
m_c_BinOp(m_OneUse(m_Shuffle(m_Value(V1), m_Undef(), m_Mask(Mask))),
|
||||
m_Constant(C))) &&
|
||||
m_Constant(C))) && !isa<ConstantExpr>(C) &&
|
||||
cast<FixedVectorType>(V1->getType())->getNumElements() <= NumElts) {
|
||||
assert(Inst.getType()->getScalarType() == V1->getType()->getScalarType() &&
|
||||
"Shuffle should not change scalar type");
|
||||
|
@ -1859,6 +1859,14 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If any of predecessors end with an indirect goto, we can't change its
|
||||
// destination. Same for CallBr.
|
||||
if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
|
||||
return isa<IndirectBrInst>(Pred->getTerminator()) ||
|
||||
isa<CallBrInst>(Pred->getTerminator());
|
||||
}))
|
||||
return false;
|
||||
|
||||
// Try to duplicate BB into PredBB.
|
||||
return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
|
||||
}
|
||||
|
@ -7397,8 +7397,17 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
||||
// Look for the next elements with the same type.
|
||||
SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
|
||||
Type *EltTy = (*IncIt)->getType();
|
||||
unsigned EltSize = EltTy->isSized() ? DL->getTypeSizeInBits(EltTy)
|
||||
: MaxVecRegSize;
|
||||
|
||||
assert(EltTy->isSized() &&
|
||||
"Instructions should all be sized at this point");
|
||||
TypeSize EltTS = DL->getTypeSizeInBits(EltTy);
|
||||
if (EltTS.isScalable()) {
|
||||
// For now, just ignore vectorizing scalable types.
|
||||
++IncIt;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned EltSize = EltTS.getFixedSize();
|
||||
unsigned MaxNumElts = MaxVecRegSize / EltSize;
|
||||
if (MaxNumElts < 2) {
|
||||
++IncIt;
|
||||
|
@ -939,7 +939,7 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_DEFAULT_DEVICE)(int KMP_DEREF arg) {
|
||||
|
||||
// Get number of NON-HOST devices.
|
||||
// libomptarget, if loaded, provides this function in api.cpp.
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE;
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) {
|
||||
#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB)
|
||||
return 0;
|
||||
@ -957,13 +957,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) {
|
||||
|
||||
// This function always returns true when called on host device.
|
||||
// Compiler/libomptarget should handle when it is called inside target region.
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE;
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
|
||||
int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) {
|
||||
return 1; // This is the host
|
||||
}
|
||||
|
||||
// libomptarget, if loaded, provides this function
|
||||
int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE;
|
||||
int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
|
||||
int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) {
|
||||
#if KMP_MIC || KMP_OS_DARWIN || KMP_OS_WINDOWS || defined(KMP_STUB)
|
||||
return KMP_HOST_DEVICE;
|
||||
@ -1318,7 +1318,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_TASK_PRIORITY)(void) {
|
||||
// This function will be defined in libomptarget. When libomptarget is not
|
||||
// loaded, we assume we are on the host and return KMP_HOST_DEVICE.
|
||||
// Compiler/libomptarget will handle this if called inside target.
|
||||
int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE;
|
||||
int FTN_STDCALL FTN_GET_DEVICE_NUM(void) KMP_WEAK_ATTRIBUTE_EXTERNAL;
|
||||
int FTN_STDCALL FTN_GET_DEVICE_NUM(void) { return KMP_HOST_DEVICE; }
|
||||
|
||||
// Compiler will ensure that this is only called from host in sequential region
|
||||
|
@ -338,10 +338,16 @@ extern "C" {
|
||||
#define KMP_ALIAS(alias_of) __attribute__((alias(alias_of)))
|
||||
#endif
|
||||
|
||||
#if KMP_HAVE_WEAK_ATTRIBUTE
|
||||
#define KMP_WEAK_ATTRIBUTE __attribute__((weak))
|
||||
#if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB
|
||||
#define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak))
|
||||
#else
|
||||
#define KMP_WEAK_ATTRIBUTE /* Nothing */
|
||||
#define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */
|
||||
#endif
|
||||
|
||||
#if KMP_HAVE_WEAK_ATTRIBUTE
|
||||
#define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak))
|
||||
#else
|
||||
#define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */
|
||||
#endif
|
||||
|
||||
// Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME
|
||||
|
@ -27,7 +27,7 @@
|
||||
#define THREAD_LOCAL __thread
|
||||
#endif
|
||||
|
||||
#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE
|
||||
#define OMPT_WEAK_ATTRIBUTE KMP_WEAK_ATTRIBUTE_INTERNAL
|
||||
|
||||
//******************************************************************************
|
||||
// macros
|
||||
|
Loading…
x
Reference in New Issue
Block a user