Merge llvm, clang, lld, lldb, compiler-rt and libc++ trunk r321545,
update build glue and version numbers, add new intrinsics headers, and update OptionalObsoleteFiles.inc.
This commit is contained in:
commit
fe4fed2e4d
@ -4551,6 +4551,20 @@ INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name, void *arg) {
|
||||
#define INIT_PTHREAD_SETNAME_NP
|
||||
#endif
|
||||
|
||||
#if SANITIZER_INTERCEPT_PTHREAD_GETNAME_NP
|
||||
INTERCEPTOR(int, pthread_getname_np, uptr thread, char *name, SIZE_T len) {
|
||||
void *ctx;
|
||||
COMMON_INTERCEPTOR_ENTER(ctx, pthread_getname_np, thread, name, len);
|
||||
int res = REAL(pthread_getname_np)(thread, name, len);
|
||||
if (!res)
|
||||
COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strnlen(name, len) + 1);
|
||||
return res;
|
||||
}
|
||||
#define INIT_PTHREAD_GETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_getname_np);
|
||||
#else
|
||||
#define INIT_PTHREAD_GETNAME_NP
|
||||
#endif
|
||||
|
||||
#if SANITIZER_INTERCEPT_SINCOS
|
||||
INTERCEPTOR(void, sincos, double x, double *sin, double *cos) {
|
||||
void *ctx;
|
||||
@ -6541,6 +6555,7 @@ static void InitializeCommonInterceptors() {
|
||||
INIT_TTYNAME_R;
|
||||
INIT_TEMPNAM;
|
||||
INIT_PTHREAD_SETNAME_NP;
|
||||
INIT_PTHREAD_GETNAME_NP;
|
||||
INIT_SINCOS;
|
||||
INIT_REMQUO;
|
||||
INIT_LGAMMA;
|
||||
|
@ -343,6 +343,8 @@
|
||||
#define SANITIZER_INTERCEPT_PHTREAD_MUTEX SI_POSIX
|
||||
#define SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP \
|
||||
(SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
|
||||
#define SANITIZER_INTERCEPT_PTHREAD_GETNAME_NP \
|
||||
(SI_FREEBSD || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
|
||||
|
||||
#define SANITIZER_INTERCEPT_TLS_GET_ADDR \
|
||||
(SI_FREEBSD || SI_NETBSD || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
|
||||
|
@ -25,6 +25,11 @@
|
||||
#include <dispatch/dispatch.h>
|
||||
#include <pthread.h>
|
||||
|
||||
// DISPATCH_NOESCAPE is not defined prior to XCode 8.
|
||||
#ifndef DISPATCH_NOESCAPE
|
||||
#define DISPATCH_NOESCAPE
|
||||
#endif
|
||||
|
||||
typedef long long_t; // NOLINT
|
||||
|
||||
namespace __tsan {
|
||||
|
@ -38,7 +38,8 @@ bool ignoreReport(SourceLocation SLoc, ReportOptions Opts, ErrorType ET) {
|
||||
const char *TypeCheckKinds[] = {
|
||||
"load of", "store to", "reference binding to", "member access within",
|
||||
"member call on", "constructor call on", "downcast of", "downcast of",
|
||||
"upcast of", "cast to virtual base of", "_Nonnull binding to"};
|
||||
"upcast of", "cast to virtual base of", "_Nonnull binding to",
|
||||
"dynamic operation on"};
|
||||
}
|
||||
|
||||
static void handleTypeMismatchImpl(TypeMismatchData *Data, ValueHandle Pointer,
|
||||
|
@ -512,6 +512,7 @@ using StatT = struct stat;
|
||||
|
||||
#if defined(__APPLE__)
|
||||
TimeSpec extract_mtime(StatT const& st) { return st.st_mtimespec; }
|
||||
__attribute__((unused)) // Suppress warning
|
||||
TimeSpec extract_atime(StatT const& st) { return st.st_atimespec; }
|
||||
#else
|
||||
TimeSpec extract_mtime(StatT const& st) { return st.st_mtim; }
|
||||
|
@ -197,6 +197,9 @@ Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
|
||||
Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
|
||||
FastMathFlags FMF, const SimplifyQuery &Q);
|
||||
|
||||
/// Given a callsite, fold the result or return null.
|
||||
Value *SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q);
|
||||
|
||||
/// Given a function and iterators over arguments, fold the result or return
|
||||
/// null.
|
||||
Value *SimplifyCall(ImmutableCallSite CS, Value *V, User::op_iterator ArgBegin,
|
||||
|
@ -407,12 +407,6 @@ class MemoryDependenceResults {
|
||||
void getNonLocalPointerDependency(Instruction *QueryInst,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result);
|
||||
|
||||
/// Perform a dependency query specifically for QueryInst's access to Loc.
|
||||
/// The other comments for getNonLocalPointerDependency apply here as well.
|
||||
void getNonLocalPointerDependencyFrom(Instruction *QueryInst,
|
||||
const MemoryLocation &Loc, bool isLoad,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result);
|
||||
|
||||
/// Removes an instruction from the dependence analysis, updating the
|
||||
/// dependence of instructions that previously depended on it.
|
||||
void removeInstruction(Instruction *InstToRemove);
|
||||
|
@ -646,9 +646,6 @@ class TargetTransformInfo {
|
||||
/// \brief Additional properties of an operand's values.
|
||||
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
|
||||
|
||||
/// \return True if target can execute instructions out of order.
|
||||
bool isOutOfOrder() const;
|
||||
|
||||
/// \return The number of scalar or vector registers that the target has.
|
||||
/// If 'Vectors' is true, it returns the number of vector registers. If it is
|
||||
/// set to false, it returns the number of scalar registers.
|
||||
@ -1021,7 +1018,6 @@ class TargetTransformInfo::Concept {
|
||||
Type *Ty) = 0;
|
||||
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
|
||||
Type *Ty) = 0;
|
||||
virtual bool isOutOfOrder() const = 0;
|
||||
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
|
||||
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
|
||||
virtual unsigned getMinVectorRegisterBitWidth() = 0;
|
||||
@ -1299,9 +1295,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
|
||||
Type *Ty) override {
|
||||
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
|
||||
}
|
||||
bool isOutOfOrder() const override {
|
||||
return Impl.isOutOfOrder();
|
||||
}
|
||||
unsigned getNumberOfRegisters(bool Vector) override {
|
||||
return Impl.getNumberOfRegisters(Vector);
|
||||
}
|
||||
|
@ -337,8 +337,6 @@ class TargetTransformInfoImplBase {
|
||||
return TTI::TCC_Free;
|
||||
}
|
||||
|
||||
bool isOutOfOrder() const { return false; }
|
||||
|
||||
unsigned getNumberOfRegisters(bool Vector) { return 8; }
|
||||
|
||||
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
|
||||
|
@ -95,7 +95,7 @@ enum MachineTypes : unsigned {
|
||||
MT_Invalid = 0xffff,
|
||||
|
||||
IMAGE_FILE_MACHINE_UNKNOWN = 0x0,
|
||||
IMAGE_FILE_MACHINE_AM33 = 0x13,
|
||||
IMAGE_FILE_MACHINE_AM33 = 0x1D3,
|
||||
IMAGE_FILE_MACHINE_AMD64 = 0x8664,
|
||||
IMAGE_FILE_MACHINE_ARM = 0x1C0,
|
||||
IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
|
||||
|
@ -402,10 +402,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
|
||||
return BaseT::getInstructionLatency(I);
|
||||
}
|
||||
|
||||
bool isOutOfOrder() const {
|
||||
return getST()->getSchedModel().isOutOfOrder();
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Vector TTI Implementations
|
||||
|
@ -288,7 +288,8 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
|
||||
llvm_i32_ty, // ordering
|
||||
llvm_i32_ty, // scope
|
||||
llvm_i1_ty], // isVolatile
|
||||
[IntrArgMemOnly, NoCapture<0>]
|
||||
[IntrArgMemOnly, NoCapture<0>], "",
|
||||
[SDNPMemOperand]
|
||||
>;
|
||||
|
||||
def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
|
||||
|
@ -226,8 +226,8 @@ class FunctionSamples {
|
||||
|
||||
sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
|
||||
uint32_t Discriminator,
|
||||
const std::string &FName,
|
||||
uint64_t Num, uint64_t Weight = 1) {
|
||||
StringRef FName, uint64_t Num,
|
||||
uint64_t Weight = 1) {
|
||||
return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
|
||||
FName, Num, Weight);
|
||||
}
|
||||
|
@ -100,13 +100,11 @@ struct KnownBits {
|
||||
|
||||
/// Make this value negative.
|
||||
void makeNegative() {
|
||||
assert(!isNonNegative() && "Can't make a non-negative value negative");
|
||||
One.setSignBit();
|
||||
}
|
||||
|
||||
/// Make this value negative.
|
||||
void makeNonNegative() {
|
||||
assert(!isNegative() && "Can't make a negative value non-negative");
|
||||
Zero.setSignBit();
|
||||
}
|
||||
|
||||
|
@ -385,8 +385,8 @@ bool DemandedBits::isInstructionDead(Instruction *I) {
|
||||
void DemandedBits::print(raw_ostream &OS) {
|
||||
performAnalysis();
|
||||
for (auto &KV : AliveBits) {
|
||||
OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
|
||||
<< *KV.first << "\n";
|
||||
OS << "DemandedBits: 0x" << Twine::utohexstr(KV.second.getLimitedValue())
|
||||
<< " for " << *KV.first << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3897,8 +3897,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
|
||||
// An undef extract index can be arbitrarily chosen to be an out-of-range
|
||||
// index value, which would result in the instruction being undef.
|
||||
@ -4494,6 +4495,22 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
return *ArgBegin;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bswap: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bswap(bswap(x)) -> x
|
||||
if (match(IIOperand, m_BSwap(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::bitreverse: {
|
||||
Value *IIOperand = *ArgBegin;
|
||||
Value *X = nullptr;
|
||||
// bitreverse(bitreverse(x)) -> x
|
||||
if (match(IIOperand, m_BitReverse(m_Value(X))))
|
||||
return X;
|
||||
return nullptr;
|
||||
}
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -4548,6 +4565,16 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
|
||||
return SimplifyRelativeLoad(C0, C1, Q.DL);
|
||||
return nullptr;
|
||||
}
|
||||
case Intrinsic::powi:
|
||||
if (ConstantInt *Power = dyn_cast<ConstantInt>(RHS)) {
|
||||
// powi(x, 0) -> 1.0
|
||||
if (Power->isZero())
|
||||
return ConstantFP::get(LHS->getType(), 1.0);
|
||||
// powi(x, 1) -> x
|
||||
if (Power->isOne())
|
||||
return LHS;
|
||||
}
|
||||
return nullptr;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
@ -4616,6 +4643,12 @@ Value *llvm::SimplifyCall(ImmutableCallSite CS, Value *V,
|
||||
return ::SimplifyCall(CS, V, Args.begin(), Args.end(), Q, RecursionLimit);
|
||||
}
|
||||
|
||||
Value *llvm::SimplifyCall(ImmutableCallSite ICS, const SimplifyQuery &Q) {
|
||||
CallSite CS(const_cast<Instruction*>(ICS.getInstruction()));
|
||||
return ::SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
|
||||
Q, RecursionLimit);
|
||||
}
|
||||
|
||||
/// See if we can compute a simplified version of this instruction.
|
||||
/// If not, this returns null.
|
||||
|
||||
@ -4750,8 +4783,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
|
||||
break;
|
||||
case Instruction::Call: {
|
||||
CallSite CS(cast<CallInst>(I));
|
||||
Result = SimplifyCall(CS, CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
|
||||
Q);
|
||||
Result = SimplifyCall(CS, Q);
|
||||
break;
|
||||
}
|
||||
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
|
||||
|
@ -265,13 +265,21 @@ void Lint::visitCallSite(CallSite CS) {
|
||||
// Check that noalias arguments don't alias other arguments. This is
|
||||
// not fully precise because we don't know the sizes of the dereferenced
|
||||
// memory regions.
|
||||
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
|
||||
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
|
||||
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
|
||||
AttributeList PAL = CS.getAttributes();
|
||||
unsigned ArgNo = 0;
|
||||
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
|
||||
// Skip ByVal arguments since they will be memcpy'd to the callee's
|
||||
// stack so we're not really passing the pointer anyway.
|
||||
if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
|
||||
continue;
|
||||
if (AI != BI && (*BI)->getType()->isPointerTy()) {
|
||||
AliasResult Result = AA->alias(*AI, *BI);
|
||||
Assert(Result != MustAlias && Result != PartialAlias,
|
||||
"Unusual: noalias argument aliases another argument", &I);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that an sret argument points to valid memory.
|
||||
if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
|
||||
|
@ -920,14 +920,6 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
|
||||
Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
|
||||
const MemoryLocation Loc = MemoryLocation::get(QueryInst);
|
||||
bool isLoad = isa<LoadInst>(QueryInst);
|
||||
return getNonLocalPointerDependencyFrom(QueryInst, Loc, isLoad, Result);
|
||||
}
|
||||
|
||||
void MemoryDependenceResults::getNonLocalPointerDependencyFrom(
|
||||
Instruction *QueryInst,
|
||||
const MemoryLocation &Loc,
|
||||
bool isLoad,
|
||||
SmallVectorImpl<NonLocalDepResult> &Result) {
|
||||
BasicBlock *FromBB = QueryInst->getParent();
|
||||
assert(FromBB);
|
||||
|
||||
@ -1127,15 +1119,21 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
|
||||
// If we already have a cache entry for this CacheKey, we may need to do some
|
||||
// work to reconcile the cache entry and the current query.
|
||||
if (!Pair.second) {
|
||||
if (CacheInfo->Size != Loc.Size) {
|
||||
// The query's Size differs from the cached one. Throw out the
|
||||
// cached data and proceed with the query at the new size.
|
||||
if (CacheInfo->Size < Loc.Size) {
|
||||
// The query's Size is greater than the cached one. Throw out the
|
||||
// cached data and proceed with the query at the greater size.
|
||||
CacheInfo->Pair = BBSkipFirstBlockPair();
|
||||
CacheInfo->Size = Loc.Size;
|
||||
for (auto &Entry : CacheInfo->NonLocalDeps)
|
||||
if (Instruction *Inst = Entry.getResult().getInst())
|
||||
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
|
||||
CacheInfo->NonLocalDeps.clear();
|
||||
} else if (CacheInfo->Size > Loc.Size) {
|
||||
// This query's Size is less than the cached one. Conservatively restart
|
||||
// the query using the greater size.
|
||||
return getNonLocalPointerDepFromBB(
|
||||
QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
|
||||
StartBB, Result, Visited, SkipFirstBlock);
|
||||
}
|
||||
|
||||
// If the query's AATags are inconsistent with the cached one,
|
||||
|
@ -306,7 +306,9 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
|
||||
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
|
||||
// Inliner doesn't handle variadic functions.
|
||||
// FIXME: refactor this to use the same code that inliner is using.
|
||||
F.isVarArg();
|
||||
F.isVarArg() ||
|
||||
// Don't try to import functions with noinline attribute.
|
||||
F.getAttributes().hasFnAttribute(Attribute::NoInline);
|
||||
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
|
||||
/* Live = */ false, F.isDSOLocal());
|
||||
FunctionSummary::FFlags FunFlags{
|
||||
|
@ -1268,7 +1268,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
|
||||
}
|
||||
if (!hasTrunc)
|
||||
return getAddExpr(Operands);
|
||||
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
|
||||
// In spite we checked in the beginning that ID is not in the cache,
|
||||
// it is possible that during recursion and different modification
|
||||
// ID came to cache, so if we found it, just return it.
|
||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
|
||||
return S;
|
||||
}
|
||||
|
||||
// trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
|
||||
@ -1284,7 +1288,11 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
|
||||
}
|
||||
if (!hasTrunc)
|
||||
return getMulExpr(Operands);
|
||||
UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL.
|
||||
// In spite we checked in the beginning that ID is not in the cache,
|
||||
// it is possible that during recursion and different modification
|
||||
// ID came to cache, so if we found it, just return it.
|
||||
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
|
||||
return S;
|
||||
}
|
||||
|
||||
// If the input value is a chrec scev, truncate the chrec's operands.
|
||||
|
@ -187,8 +187,21 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
|
||||
// generated code.
|
||||
if (isa<DbgInfoIntrinsic>(IP))
|
||||
ScanLimit++;
|
||||
|
||||
// Conservatively, do not use any instruction which has any of wrap/exact
|
||||
// flags installed.
|
||||
// TODO: Instead of simply disable poison instructions we can be clever
|
||||
// here and match SCEV to this instruction.
|
||||
auto canGeneratePoison = [](Instruction *I) {
|
||||
if (isa<OverflowingBinaryOperator>(I) &&
|
||||
(I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
|
||||
return true;
|
||||
if (isa<PossiblyExactOperator>(I) && I->isExact())
|
||||
return true;
|
||||
return false;
|
||||
};
|
||||
if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
|
||||
IP->getOperand(1) == RHS)
|
||||
IP->getOperand(1) == RHS && !canGeneratePoison(&*IP))
|
||||
return &*IP;
|
||||
if (IP == BlockBegin) break;
|
||||
}
|
||||
|
@ -314,10 +314,6 @@ int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
|
||||
return Cost;
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::isOutOfOrder() const {
|
||||
return TTIImpl->isOutOfOrder();
|
||||
}
|
||||
|
||||
unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
|
||||
return TTIImpl->getNumberOfRegisters(Vector);
|
||||
}
|
||||
|
@ -4238,14 +4238,14 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
|
||||
LHS = CmpLHS;
|
||||
RHS = CmpRHS;
|
||||
|
||||
// If the predicate is an "or-equal" (FP) predicate, then signed zeroes may
|
||||
// return inconsistent results between implementations.
|
||||
// (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
|
||||
// minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
|
||||
// Therefore we behave conservatively and only proceed if at least one of the
|
||||
// operands is known to not be zero, or if we don't care about signed zeroes.
|
||||
// Signed zero may return inconsistent results between implementations.
|
||||
// (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
|
||||
// minNum(0.0, -0.0) // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
|
||||
// Therefore, we behave conservatively and only proceed if at least one of the
|
||||
// operands is known to not be zero or if we don't care about signed zero.
|
||||
switch (Pred) {
|
||||
default: break;
|
||||
// FIXME: Include OGT/OLT/UGT/ULT.
|
||||
case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
|
||||
case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
|
||||
if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
|
||||
@ -4493,14 +4493,24 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
|
||||
|
||||
// Deal with type mismatches.
|
||||
if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
|
||||
if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
|
||||
if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
|
||||
// If this is a potential fmin/fmax with a cast to integer, then ignore
|
||||
// -0.0 because there is no corresponding integer value.
|
||||
if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
|
||||
FMF.setNoSignedZeros();
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
|
||||
cast<CastInst>(TrueVal)->getOperand(0), C,
|
||||
LHS, RHS);
|
||||
if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
|
||||
}
|
||||
if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
|
||||
// If this is a potential fmin/fmax with a cast to integer, then ignore
|
||||
// -0.0 because there is no corresponding integer value.
|
||||
if (*CastOp == Instruction::FPToSI || *CastOp == Instruction::FPToUI)
|
||||
FMF.setNoSignedZeros();
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
|
||||
C, cast<CastInst>(FalseVal)->getOperand(0),
|
||||
LHS, RHS);
|
||||
}
|
||||
}
|
||||
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
|
||||
LHS, RHS);
|
||||
|
@ -1922,14 +1922,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
|
||||
EVT VT = Sel.getValueType();
|
||||
SDLoc DL(Sel);
|
||||
SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
|
||||
assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
|
||||
isConstantFPBuildVectorOrConstantFP(NewCT)) &&
|
||||
"Failed to constant fold a binop with constant operands");
|
||||
if (!NewCT.isUndef() &&
|
||||
!isConstantOrConstantVector(NewCT, true) &&
|
||||
!isConstantFPBuildVectorOrConstantFP(NewCT))
|
||||
return SDValue();
|
||||
|
||||
SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
|
||||
assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
|
||||
isConstantFPBuildVectorOrConstantFP(NewCF)) &&
|
||||
"Failed to constant fold a binop with constant operands");
|
||||
if (!NewCF.isUndef() &&
|
||||
!isConstantOrConstantVector(NewCF, true) &&
|
||||
!isConstantFPBuildVectorOrConstantFP(NewCF))
|
||||
return SDValue();
|
||||
|
||||
return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
|
||||
}
|
||||
@ -3577,7 +3579,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
|
||||
|
||||
// TODO: What is the 'or' equivalent of this fold?
|
||||
// (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
|
||||
if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
|
||||
if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
|
||||
IsInteger && CC0 == ISD::SETNE &&
|
||||
((isNullConstant(LR) && isAllOnesConstant(RR)) ||
|
||||
(isAllOnesConstant(LR) && isNullConstant(RR)))) {
|
||||
SDValue One = DAG.getConstant(1, DL, OpVT);
|
||||
@ -3641,15 +3644,18 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
|
||||
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
|
||||
VT.getSizeInBits() <= 64) {
|
||||
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
||||
APInt ADDC = ADDI->getAPIntValue();
|
||||
if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
|
||||
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
|
||||
// immediate for an add, but it is legal if its top c2 bits are set,
|
||||
// transform the ADD so the immediate doesn't need to be materialized
|
||||
// in a register.
|
||||
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
|
||||
APInt ADDC = ADDI->getAPIntValue();
|
||||
APInt SRLC = SRLI->getAPIntValue();
|
||||
if (ADDC.getMinSignedBits() <= 64 &&
|
||||
SRLC.ult(VT.getSizeInBits()) &&
|
||||
!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
|
||||
SRLI->getZExtValue());
|
||||
SRLC.getZExtValue());
|
||||
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
|
||||
ADDC |= Mask;
|
||||
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
|
||||
@ -3987,6 +3993,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
// reassociate and
|
||||
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
|
||||
return RAND;
|
||||
|
||||
// Try to convert a constant mask AND into a shuffle clear mask.
|
||||
if (VT.isVector())
|
||||
if (SDValue Shuffle = XformToShuffleWithZero(N))
|
||||
return Shuffle;
|
||||
|
||||
// fold (and (or x, C), D) -> D if (C & D) == D
|
||||
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
|
||||
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
|
||||
@ -16480,6 +16492,8 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
|
||||
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
|
||||
/// vector_shuffle V, Zero, <0, 4, 2, 4>
|
||||
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
||||
assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = peekThroughBitcast(N->getOperand(1));
|
||||
@ -16490,9 +16504,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
|
||||
if (LegalOperations)
|
||||
return SDValue();
|
||||
|
||||
if (N->getOpcode() != ISD::AND)
|
||||
return SDValue();
|
||||
|
||||
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
|
||||
return SDValue();
|
||||
|
||||
@ -16581,10 +16592,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
|
||||
N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
|
||||
return Fold;
|
||||
|
||||
// Try to convert a constant mask AND into a shuffle clear mask.
|
||||
if (SDValue Shuffle = XformToShuffleWithZero(N))
|
||||
return Shuffle;
|
||||
|
||||
// Type legalization might introduce new shuffles in the DAG.
|
||||
// Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
|
||||
// -> (shuffle (VBinOp (A, B)), Undef, Mask).
|
||||
|
@ -49,6 +49,8 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "legalizevectorops"
|
||||
|
||||
namespace {
|
||||
|
||||
class VectorLegalizer {
|
||||
@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
if (Op.getOpcode() == ISD::LOAD) {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
|
||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
|
||||
if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
|
||||
DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
|
||||
switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
|
||||
LD->getMemoryVT())) {
|
||||
default: llvm_unreachable("This action is not supported yet!");
|
||||
@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
Changed = true;
|
||||
return LegalizeOp(ExpandLoad(Op));
|
||||
}
|
||||
}
|
||||
} else if (Op.getOpcode() == ISD::STORE) {
|
||||
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
|
||||
EVT StVT = ST->getMemoryVT();
|
||||
MVT ValVT = ST->getValue().getSimpleValueType();
|
||||
if (StVT.isVector() && ST->isTruncatingStore())
|
||||
if (StVT.isVector() && ST->isTruncatingStore()) {
|
||||
DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
|
||||
Node->dump(&DAG));
|
||||
switch (TLI.getTruncStoreAction(ValVT, StVT)) {
|
||||
default: llvm_unreachable("This action is not supported yet!");
|
||||
case TargetLowering::Legal:
|
||||
@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
Changed = true;
|
||||
return LegalizeOp(ExpandStore(Op));
|
||||
}
|
||||
}
|
||||
} else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
|
||||
HasVectorValue = true;
|
||||
|
||||
@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
break;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
|
||||
|
||||
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
|
||||
default: llvm_unreachable("This action is not supported yet!");
|
||||
case TargetLowering::Promote:
|
||||
@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
Changed = true;
|
||||
break;
|
||||
case TargetLowering::Legal:
|
||||
DEBUG(dbgs() << "Legal node: nothing to do\n");
|
||||
break;
|
||||
case TargetLowering::Custom: {
|
||||
DEBUG(dbgs() << "Trying custom legalization\n");
|
||||
if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
|
||||
DEBUG(dbgs() << "Successfully custom legalized node\n");
|
||||
Result = Tmp1;
|
||||
break;
|
||||
}
|
||||
DEBUG(dbgs() << "Could not custom legalize node\n");
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
case TargetLowering::Expand:
|
||||
|
@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain,
|
||||
@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
|
||||
@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
|
||||
@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
|
||||
@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
|
||||
@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
|
||||
@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
|
||||
@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
|
||||
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
SDValue V(N, 0);
|
||||
NewSDValueDbgMsg(V, "Creating new node: ", this);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
|
||||
@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
|
||||
void SelectionDAG::salvageDebugInfo(SDNode &N) {
|
||||
if (!N.getHasDebugValue())
|
||||
return;
|
||||
|
||||
SmallVector<SDDbgValue *, 2> ClonedDVs;
|
||||
for (auto DV : GetDbgValues(&N)) {
|
||||
if (DV->isInvalidated())
|
||||
continue;
|
||||
@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
|
||||
SDDbgValue *Clone =
|
||||
getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
|
||||
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
|
||||
ClonedDVs.push_back(Clone);
|
||||
DV->setIsInvalidated();
|
||||
AddDbgValue(Clone, N0.getNode(), false);
|
||||
DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
|
||||
dbgs() << " into " << *DIExpr << '\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (SDDbgValue *Dbg : ClonedDVs)
|
||||
AddDbgValue(Dbg, Dbg->getSDNode(), false);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -237,6 +237,59 @@ class InstructionVerifier;
|
||||
/// Builds BasicBlockState for each BB of the function.
|
||||
/// It can traverse function for verification and provides all required
|
||||
/// information.
|
||||
///
|
||||
/// GC pointer may be in one of three states: relocated, unrelocated and
|
||||
/// poisoned.
|
||||
/// Relocated pointer may be used without any restrictions.
|
||||
/// Unrelocated pointer cannot be dereferenced, passed as argument to any call
|
||||
/// or returned. Unrelocated pointer may be safely compared against another
|
||||
/// unrelocated pointer or against a pointer exclusively derived from null.
|
||||
/// Poisoned pointers are produced when we somehow derive pointer from relocated
|
||||
/// and unrelocated pointers (e.g. phi, select). This pointers may be safely
|
||||
/// used in a very limited number of situations. Currently the only way to use
|
||||
/// it is comparison against constant exclusively derived from null. All
|
||||
/// limitations arise due to their undefined state: this pointers should be
|
||||
/// treated as relocated and unrelocated simultaneously.
|
||||
/// Rules of deriving:
|
||||
/// R + U = P - that's where the poisoned pointers come from
|
||||
/// P + X = P
|
||||
/// U + U = U
|
||||
/// R + R = R
|
||||
/// X + C = X
|
||||
/// Where "+" - any operation that somehow derive pointer, U - unrelocated,
|
||||
/// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
|
||||
/// nothing (in case when "+" is unary operation).
|
||||
/// Deriving of pointers by itself is always safe.
|
||||
/// NOTE: when we are making decision on the status of instruction's result:
|
||||
/// a) for phi we need to check status of each input *at the end of
|
||||
/// corresponding predecessor BB*.
|
||||
/// b) for other instructions we need to check status of each input *at the
|
||||
/// current point*.
|
||||
///
|
||||
/// FIXME: This works fairly well except one case
|
||||
/// bb1:
|
||||
/// p = *some GC-ptr def*
|
||||
/// p1 = gep p, offset
|
||||
/// / |
|
||||
/// / |
|
||||
/// bb2: |
|
||||
/// safepoint |
|
||||
/// \ |
|
||||
/// \ |
|
||||
/// bb3:
|
||||
/// p2 = phi [p, bb2] [p1, bb1]
|
||||
/// p3 = phi [p, bb2] [p, bb1]
|
||||
/// here p and p1 is unrelocated
|
||||
/// p2 and p3 is poisoned (though they shouldn't be)
|
||||
///
|
||||
/// This leads to some weird results:
|
||||
/// cmp eq p, p2 - illegal instruction (false-positive)
|
||||
/// cmp eq p1, p2 - illegal instruction (false-positive)
|
||||
/// cmp eq p, p3 - illegal instruction (false-positive)
|
||||
/// cmp eq p, p1 - ok
|
||||
/// To fix this we need to introduce conception of generations and be able to
|
||||
/// check if two values belong to one generation or not. This way p2 will be
|
||||
/// considered to be unrelocated and no false alarm will happen.
|
||||
class GCPtrTracker {
|
||||
const Function &F;
|
||||
SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
|
||||
@ -244,6 +297,9 @@ class GCPtrTracker {
|
||||
// This set contains defs of unrelocated pointers that are proved to be legal
|
||||
// and don't need verification.
|
||||
DenseSet<const Instruction *> ValidUnrelocatedDefs;
|
||||
// This set contains poisoned defs. They can be safely ignored during
|
||||
// verification too.
|
||||
DenseSet<const Value *> PoisonedDefs;
|
||||
|
||||
public:
|
||||
GCPtrTracker(const Function &F, const DominatorTree &DT);
|
||||
@ -251,6 +307,8 @@ class GCPtrTracker {
|
||||
BasicBlockState *getBasicBlockState(const BasicBlock *BB);
|
||||
const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
|
||||
|
||||
bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
|
||||
|
||||
/// Traverse each BB of the function and call
|
||||
/// InstructionVerifier::verifyInstruction for each possibly invalid
|
||||
/// instruction.
|
||||
@ -349,7 +407,9 @@ const BasicBlockState *GCPtrTracker::getBasicBlockState(
|
||||
}
|
||||
|
||||
bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
|
||||
return ValidUnrelocatedDefs.count(I);
|
||||
// Poisoned defs are skipped since they are always safe by itself by
|
||||
// definition (for details see comment to this class).
|
||||
return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
|
||||
}
|
||||
|
||||
void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
|
||||
@ -418,31 +478,78 @@ bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
|
||||
"Passed Contribution should be from the passed BasicBlockState!");
|
||||
AvailableValueSet AvailableSet = BBS->AvailableIn;
|
||||
bool ContributionChanged = false;
|
||||
// For explanation why instructions are processed this way see
|
||||
// "Rules of deriving" in the comment to this class.
|
||||
for (const Instruction &I : *BB) {
|
||||
bool ProducesUnrelocatedPointer = false;
|
||||
if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
|
||||
containsGCPtrType(I.getType())) {
|
||||
// GEP/bitcast of unrelocated pointer is legal by itself but this
|
||||
// def shouldn't appear in any AvailableSet.
|
||||
bool ValidUnrelocatedPointerDef = false;
|
||||
bool PoisonedPointerDef = false;
|
||||
// TODO: `select` instructions should be handled here too.
|
||||
if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
|
||||
if (containsGCPtrType(PN->getType())) {
|
||||
// If both is true, output is poisoned.
|
||||
bool HasRelocatedInputs = false;
|
||||
bool HasUnrelocatedInputs = false;
|
||||
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
|
||||
const BasicBlock *InBB = PN->getIncomingBlock(i);
|
||||
const Value *InValue = PN->getIncomingValue(i);
|
||||
|
||||
if (isNotExclusivelyConstantDerived(InValue)) {
|
||||
if (isValuePoisoned(InValue)) {
|
||||
// If any of inputs is poisoned, output is always poisoned too.
|
||||
HasRelocatedInputs = true;
|
||||
HasUnrelocatedInputs = true;
|
||||
break;
|
||||
}
|
||||
if (BlockMap[InBB]->AvailableOut.count(InValue))
|
||||
HasRelocatedInputs = true;
|
||||
else
|
||||
HasUnrelocatedInputs = true;
|
||||
}
|
||||
}
|
||||
if (HasUnrelocatedInputs) {
|
||||
if (HasRelocatedInputs)
|
||||
PoisonedPointerDef = true;
|
||||
else
|
||||
ValidUnrelocatedPointerDef = true;
|
||||
}
|
||||
}
|
||||
} else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
|
||||
containsGCPtrType(I.getType())) {
|
||||
// GEP/bitcast of unrelocated pointer is legal by itself but this def
|
||||
// shouldn't appear in any AvailableSet.
|
||||
for (const Value *V : I.operands())
|
||||
if (containsGCPtrType(V->getType()) &&
|
||||
isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
|
||||
ProducesUnrelocatedPointer = true;
|
||||
if (isValuePoisoned(V))
|
||||
PoisonedPointerDef = true;
|
||||
else
|
||||
ValidUnrelocatedPointerDef = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!ProducesUnrelocatedPointer) {
|
||||
assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
|
||||
"Value cannot be both unrelocated and poisoned!");
|
||||
if (ValidUnrelocatedPointerDef) {
|
||||
// Remove def of unrelocated pointer from Contribution of this BB and
|
||||
// trigger update of all its successors.
|
||||
Contribution.erase(&I);
|
||||
PoisonedDefs.erase(&I);
|
||||
ValidUnrelocatedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing urelocated " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
} else if (PoisonedPointerDef) {
|
||||
// Mark pointer as poisoned, remove its def from Contribution and trigger
|
||||
// update of all successors.
|
||||
Contribution.erase(&I);
|
||||
PoisonedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
} else {
|
||||
bool Cleared = false;
|
||||
transferInstruction(I, Cleared, AvailableSet);
|
||||
(void)Cleared;
|
||||
} else {
|
||||
// Remove def of unrelocated pointer from Contribution of this BB
|
||||
// and trigger update of all its successors.
|
||||
Contribution.erase(&I);
|
||||
ValidUnrelocatedDefs.insert(&I);
|
||||
DEBUG(dbgs() << "Removing " << I << " from Contribution of "
|
||||
<< BB->getName() << "\n");
|
||||
ContributionChanged = true;
|
||||
}
|
||||
}
|
||||
return ContributionChanged;
|
||||
@ -524,8 +631,8 @@ void InstructionVerifier::verifyInstruction(
|
||||
|
||||
// Returns true if LHS and RHS are unrelocated pointers and they are
|
||||
// valid unrelocated uses.
|
||||
auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS,
|
||||
&RHS] () {
|
||||
auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
|
||||
&LHS, &RHS] () {
|
||||
// A cmp instruction has valid unrelocated pointer operands only if
|
||||
// both operands are unrelocated pointers.
|
||||
// In the comparison between two pointers, if one is an unrelocated
|
||||
@ -545,12 +652,23 @@ void InstructionVerifier::verifyInstruction(
|
||||
(baseTyLHS == BaseType::NonConstant &&
|
||||
baseTyRHS == BaseType::ExclusivelySomeConstant))
|
||||
return false;
|
||||
|
||||
// If one of pointers is poisoned and other is not exclusively derived
|
||||
// from null it is an invalid expression: it produces poisoned result
|
||||
// and unless we want to track all defs (not only gc pointers) the only
|
||||
// option is to prohibit such instructions.
|
||||
if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
|
||||
(Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
|
||||
return false;
|
||||
|
||||
// All other cases are valid cases enumerated below:
|
||||
// 1. Comparison between an exlusively derived null pointer and a
|
||||
// 1. Comparison between an exclusively derived null pointer and a
|
||||
// constant base pointer.
|
||||
// 2. Comparison between an exlusively derived null pointer and a
|
||||
// 2. Comparison between an exclusively derived null pointer and a
|
||||
// non-constant unrelocated base pointer.
|
||||
// 3. Comparison between 2 unrelocated pointers.
|
||||
// 4. Comparison between a pointer exclusively derived from null and a
|
||||
// non-constant poisoned pointer.
|
||||
return true;
|
||||
};
|
||||
if (!hasValidUnrelocatedUse()) {
|
||||
|
@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
|
||||
// from the ObjC data structures generated by the front end.
|
||||
|
||||
// special case if this data blob is an ObjC class definition
|
||||
std::string Section = v->getSection();
|
||||
if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
|
||||
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
|
||||
addObjCClass(gv);
|
||||
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(v)) {
|
||||
StringRef Section = GV->getSection();
|
||||
if (Section.startswith("__OBJC,__class,")) {
|
||||
addObjCClass(GV);
|
||||
}
|
||||
}
|
||||
|
||||
// special case if this data blob is an ObjC category definition
|
||||
else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
|
||||
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
|
||||
addObjCCategory(gv);
|
||||
// special case if this data blob is an ObjC category definition
|
||||
else if (Section.startswith("__OBJC,__category,")) {
|
||||
addObjCCategory(GV);
|
||||
}
|
||||
}
|
||||
|
||||
// special case if this data blob is the list of referenced classes
|
||||
else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
|
||||
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
|
||||
addObjCClassRef(gv);
|
||||
// special case if this data blob is the list of referenced classes
|
||||
else if (Section.startswith("__OBJC,__cls_refs,")) {
|
||||
addObjCClassRef(GV);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
|
||||
if (TempDir.empty())
|
||||
return;
|
||||
// User asked to save temps, let dump the bitcode file after import.
|
||||
std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
|
||||
std::string SaveTempPath = (TempDir + llvm::Twine(count) + Suffix).str();
|
||||
std::error_code EC;
|
||||
raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
|
||||
if (EC)
|
||||
|
@ -1086,7 +1086,7 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
|
||||
return false;
|
||||
}
|
||||
|
||||
Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
|
||||
static Triple::OSType getOSTypeFromPlatform(MachO::PlatformType Type) {
|
||||
switch (Type) {
|
||||
case MachO::PLATFORM_MACOS: return Triple::MacOSX;
|
||||
case MachO::PLATFORM_IOS: return Triple::IOS;
|
||||
|
@ -423,13 +423,17 @@ bool ELFAsmParser::parseGroup(StringRef &GroupName) {
|
||||
if (L.isNot(AsmToken::Comma))
|
||||
return TokError("expected group name");
|
||||
Lex();
|
||||
if (getParser().parseIdentifier(GroupName))
|
||||
return true;
|
||||
if (L.is(AsmToken::Integer)) {
|
||||
GroupName = getTok().getString();
|
||||
Lex();
|
||||
} else if (getParser().parseIdentifier(GroupName)) {
|
||||
return TokError("invalid group name");
|
||||
}
|
||||
if (L.is(AsmToken::Comma)) {
|
||||
Lex();
|
||||
StringRef Linkage;
|
||||
if (getParser().parseIdentifier(Linkage))
|
||||
return true;
|
||||
return TokError("invalid linkage");
|
||||
if (Linkage != "comdat")
|
||||
return TokError("Linkage must be 'comdat'");
|
||||
}
|
||||
|
@ -628,7 +628,7 @@ CoverageMapping::getInstantiationGroups(StringRef Filename) const {
|
||||
}
|
||||
|
||||
std::vector<InstantiationGroup> Result;
|
||||
for (const auto &InstantiationSet : InstantiationSetCollector) {
|
||||
for (auto &InstantiationSet : InstantiationSetCollector) {
|
||||
InstantiationGroup IG{InstantiationSet.first.first,
|
||||
InstantiationSet.first.second,
|
||||
std::move(InstantiationSet.second)};
|
||||
|
@ -666,7 +666,7 @@ void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
|
||||
ParseIndexList(Data, Offset, Indicies);
|
||||
break;
|
||||
default:
|
||||
errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n';
|
||||
errs() << "unrecognised tag: 0x" << Twine::utohexstr(Tag) << '\n';
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -688,7 +688,9 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
|
||||
O->getNumOccurrencesFlag() == cl::OneOrMore;
|
||||
}
|
||||
|
||||
static bool isWhitespace(char C) { return strchr(" \t\n\r\f\v", C); }
|
||||
static bool isWhitespace(char C) {
|
||||
return C == ' ' || C == '\t' || C == '\r' || C == '\n';
|
||||
}
|
||||
|
||||
static bool isQuote(char C) { return C == '\"' || C == '\''; }
|
||||
|
||||
@ -709,17 +711,19 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
break;
|
||||
}
|
||||
|
||||
char C = Src[I];
|
||||
|
||||
// Backslash escapes the next character.
|
||||
if (I + 1 < E && Src[I] == '\\') {
|
||||
if (I + 1 < E && C == '\\') {
|
||||
++I; // Skip the escape.
|
||||
Token.push_back(Src[I]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Consume a quoted string.
|
||||
if (isQuote(Src[I])) {
|
||||
char Quote = Src[I++];
|
||||
while (I != E && Src[I] != Quote) {
|
||||
if (isQuote(C)) {
|
||||
++I;
|
||||
while (I != E && Src[I] != C) {
|
||||
// Backslash escapes the next character.
|
||||
if (Src[I] == '\\' && I + 1 != E)
|
||||
++I;
|
||||
@ -732,7 +736,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
}
|
||||
|
||||
// End the token if this is whitespace.
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
if (!Token.empty())
|
||||
NewArgv.push_back(Saver.save(StringRef(Token)).data());
|
||||
Token.clear();
|
||||
@ -740,7 +744,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
|
||||
}
|
||||
|
||||
// This is a normal character. Append it.
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
}
|
||||
|
||||
// Append the last token after hitting EOF with no whitespace.
|
||||
@ -798,25 +802,27 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||
// end of the source string.
|
||||
enum { INIT, UNQUOTED, QUOTED } State = INIT;
|
||||
for (size_t I = 0, E = Src.size(); I != E; ++I) {
|
||||
char C = Src[I];
|
||||
|
||||
// INIT state indicates that the current input index is at the start of
|
||||
// the string or between tokens.
|
||||
if (State == INIT) {
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
// Mark the end of lines in response files
|
||||
if (MarkEOLs && Src[I] == '\n')
|
||||
if (MarkEOLs && C == '\n')
|
||||
NewArgv.push_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = QUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
@ -825,38 +831,38 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
|
||||
// quotes.
|
||||
if (State == UNQUOTED) {
|
||||
// Whitespace means the end of the token.
|
||||
if (isWhitespace(Src[I])) {
|
||||
if (isWhitespace(C)) {
|
||||
NewArgv.push_back(Saver.save(StringRef(Token)).data());
|
||||
Token.clear();
|
||||
State = INIT;
|
||||
// Mark the end of lines in response files
|
||||
if (MarkEOLs && Src[I] == '\n')
|
||||
if (MarkEOLs && C == '\n')
|
||||
NewArgv.push_back(nullptr);
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = QUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
continue;
|
||||
}
|
||||
|
||||
// QUOTED state means that it's reading a token quoted by double quotes.
|
||||
if (State == QUOTED) {
|
||||
if (Src[I] == '"') {
|
||||
if (C == '"') {
|
||||
State = UNQUOTED;
|
||||
continue;
|
||||
}
|
||||
if (Src[I] == '\\') {
|
||||
if (C == '\\') {
|
||||
I = parseBackslash(Src, I, Token);
|
||||
continue;
|
||||
}
|
||||
Token.push_back(Src[I]);
|
||||
Token.push_back(C);
|
||||
}
|
||||
}
|
||||
// Append the last token after hitting EOF with no whitespace.
|
||||
|
@ -110,7 +110,7 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
|
||||
return 1;
|
||||
|
||||
if (ErrorsPrinted > 0)
|
||||
return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n");
|
||||
return reportError(argv0, Twine(ErrorsPrinted) + " errors.\n");
|
||||
|
||||
// Declare success.
|
||||
Out.keep();
|
||||
|
@ -3673,15 +3673,6 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
|
||||
@ -3691,17 +3682,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
AArch64::FMULv2i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
AArch64::FMULv2f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv2f64:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
|
||||
@ -3711,17 +3702,17 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
AArch64::FMULv2i64_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
AArch64::FMULv2f64)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
case AArch64::FSUBv4f32:
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(2),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
|
||||
@ -3731,6 +3722,15 @@ static bool getFMAPatterns(MachineInstr &Root,
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
|
||||
Found = true;
|
||||
}
|
||||
if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4i32_indexed)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
|
||||
Found = true;
|
||||
} else if (canCombineWithFMUL(MBB, Root.getOperand(1),
|
||||
AArch64::FMULv4f32)) {
|
||||
Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
|
||||
Found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return Found;
|
||||
@ -5062,4 +5062,4 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
||||
It = MBB.insert(It, LDRXpost);
|
||||
|
||||
return It;
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,8 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Implementation of permutation networks.
|
||||
|
||||
@ -147,6 +149,7 @@ struct Coloring {
|
||||
void build();
|
||||
bool color();
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) {
|
||||
uint8_t Color = None;
|
||||
@ -300,6 +303,7 @@ void Coloring::dump() const {
|
||||
dbgs() << " }\n}\n";
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Base class of for reordering networks. They don't strictly need to be
|
||||
// permutations, as outputs with repeated occurrences of an input element
|
||||
// are allowed.
|
||||
@ -408,7 +412,7 @@ struct BenesNetwork : public PermNetwork {
|
||||
private:
|
||||
bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
|
||||
unsigned Step) {
|
||||
@ -602,6 +606,7 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
|
||||
// Support for building selection results (output instructions that are
|
||||
// parts of the final selection).
|
||||
|
||||
namespace {
|
||||
struct OpRef {
|
||||
OpRef(SDValue V) : OpV(V) {}
|
||||
bool isValue() const { return OpV.getNode() != nullptr; }
|
||||
@ -689,6 +694,7 @@ struct ResultStack {
|
||||
|
||||
void print(raw_ostream &OS, const SelectionDAG &G) const;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
|
||||
if (isValue()) {
|
||||
@ -740,6 +746,7 @@ void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct ShuffleMask {
|
||||
ShuffleMask(ArrayRef<int> M) : Mask(M) {
|
||||
for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
|
||||
@ -763,6 +770,7 @@ struct ShuffleMask {
|
||||
return ShuffleMask(Mask.take_back(H));
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// The HvxSelector class.
|
||||
|
@ -1797,11 +1797,7 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
|
||||
llvm_unreachable("unsupported fp type");
|
||||
|
||||
APInt API = APF.bitcastToAPInt();
|
||||
std::string hexstr(utohexstr(API.getZExtValue()));
|
||||
O << lead;
|
||||
if (hexstr.length() < numHex)
|
||||
O << std::string(numHex - hexstr.length(), '0');
|
||||
O << utohexstr(API.getZExtValue());
|
||||
O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
|
||||
}
|
||||
|
||||
void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "nvptx-mcexpr"
|
||||
@ -47,10 +48,7 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
|
||||
}
|
||||
|
||||
APInt API = APF.bitcastToAPInt();
|
||||
std::string HexStr(utohexstr(API.getZExtValue()));
|
||||
if (HexStr.length() < NumHex)
|
||||
OS << std::string(NumHex - HexStr.length(), '0');
|
||||
OS << utohexstr(API.getZExtValue());
|
||||
OS << format_hex_no_prefix(API.getZExtValue(), NumHex, /*Upper=*/true);
|
||||
}
|
||||
|
||||
const NVPTXGenericMCSymbolRefExpr*
|
||||
|
@ -190,7 +190,7 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
|
||||
if (isVerbose()) {
|
||||
OutStreamer->AddComment("fallthrough-return: $pop" +
|
||||
utostr(MFI->getWARegStackId(
|
||||
Twine(MFI->getWARegStackId(
|
||||
MFI->getWAReg(MI->getOperand(0).getReg()))));
|
||||
OutStreamer->AddBlankLine();
|
||||
}
|
||||
|
@ -2377,10 +2377,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
Flags |= Prefix;
|
||||
Name = Parser.getTok().getString();
|
||||
Parser.Lex(); // eat the prefix
|
||||
// Hack: we could have something like
|
||||
// Hack: we could have something like "rep # some comment" or
|
||||
// "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
|
||||
while (Name.startswith(";") || Name.startswith("\n") ||
|
||||
Name.startswith("\t") || Name.startswith("/")) {
|
||||
Name.startswith("#") || Name.startswith("\t") ||
|
||||
Name.startswith("/")) {
|
||||
Name = Parser.getTok().getString();
|
||||
Parser.Lex(); // go to next prefix or instr
|
||||
}
|
||||
|
@ -739,7 +739,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [
|
||||
FeatureVNNI,
|
||||
FeatureVPCLMULQDQ,
|
||||
FeatureVPOPCNTDQ,
|
||||
FeatureGFNI
|
||||
FeatureGFNI,
|
||||
FeatureCLWB
|
||||
]>;
|
||||
|
||||
class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel,
|
||||
|
@ -1310,8 +1310,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v8i64, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget.hasCDI()) {
|
||||
@ -1388,8 +1386,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, VT, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, VT, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, VT, Legal);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7108,8 +7104,8 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getConstant(Immediate, dl, VT);
|
||||
}
|
||||
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
|
||||
SDValue
|
||||
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert((VT.getVectorElementType() == MVT::i1) &&
|
||||
@ -7131,8 +7127,8 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
|
||||
// We have to manually lower both halves so getNode doesn't try to
|
||||
// reassemble the build_vector.
|
||||
Lower = LowerBUILD_VECTORvXi1(Lower, DAG);
|
||||
Upper = LowerBUILD_VECTORvXi1(Upper, DAG);
|
||||
Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
|
||||
Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
|
||||
}
|
||||
SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
|
||||
@ -7881,7 +7877,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Generate vectors for predicate vectors.
|
||||
if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
|
||||
return LowerBUILD_VECTORvXi1(Op, DAG);
|
||||
return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget);
|
||||
|
||||
if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
|
||||
return VectorConstant;
|
||||
@ -15543,7 +15539,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
|
||||
@ -15551,9 +15546,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
DAG.getUNDEF(SrcVT)));
|
||||
}
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(VT == MVT::v2f64 && "Unexpected type");
|
||||
Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
|
||||
@ -15653,8 +15654,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
|
||||
}
|
||||
|
||||
/// 64-bit unsigned integer to double expansion.
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// This algorithm is not obvious. Here it is what we're trying to output:
|
||||
/*
|
||||
movq %rax, %xmm0
|
||||
@ -15674,7 +15675,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
// Build some magic constants.
|
||||
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
|
||||
Constant *C0 = ConstantDataVector::get(*Context, CV0);
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
|
||||
SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
|
||||
|
||||
SmallVector<Constant*,2> CV1;
|
||||
@ -15721,8 +15722,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
|
||||
}
|
||||
|
||||
/// 32-bit unsigned integer to float expansion.
|
||||
SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDLoc dl(Op);
|
||||
// FP constant to bias correct the final result.
|
||||
SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
|
||||
@ -15755,16 +15756,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
|
||||
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
|
||||
|
||||
// Handle final rounding.
|
||||
MVT DestVT = Op.getSimpleValueType();
|
||||
|
||||
if (DestVT.bitsLT(MVT::f64))
|
||||
return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
if (DestVT.bitsGT(MVT::f64))
|
||||
return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
|
||||
|
||||
// Handle final rounding.
|
||||
return Sub;
|
||||
return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType());
|
||||
}
|
||||
|
||||
static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
|
||||
@ -15896,16 +15888,22 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1)
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
|
||||
if (SrcVT == MVT::v2i1) {
|
||||
// For v2i1, we need to widen to v4i1 first.
|
||||
assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
|
||||
N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
|
||||
DAG.getUNDEF(MVT::v2i1));
|
||||
return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
|
||||
}
|
||||
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
|
||||
@ -15930,7 +15928,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
auto PtrVT = getPointerTy(DAG.getDataLayout());
|
||||
|
||||
if (Op.getSimpleValueType().isVector())
|
||||
return lowerUINT_TO_FP_vec(Op, DAG);
|
||||
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
|
||||
|
||||
MVT SrcVT = N0.getSimpleValueType();
|
||||
MVT DstVT = Op.getSimpleValueType();
|
||||
@ -15943,9 +15941,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
}
|
||||
|
||||
if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
|
||||
return LowerUINT_TO_FP_i64(Op, DAG);
|
||||
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
|
||||
if (SrcVT == MVT::i32 && X86ScalarSSEf64)
|
||||
return LowerUINT_TO_FP_i32(Op, DAG);
|
||||
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
|
||||
if (Subtarget.is64Bit() && SrcVT == MVT::i64 && DstVT == MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
@ -16283,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op,
|
||||
// Truncate if we had to extend i16/i8 above.
|
||||
if (VT != ExtVT) {
|
||||
WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts);
|
||||
SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal);
|
||||
SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
|
||||
}
|
||||
|
||||
// Extract back to 128/256-bit if we widened.
|
||||
@ -18428,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
|
||||
// Truncate if we had to extend i16/i8 above.
|
||||
if (VT != ExtVT) {
|
||||
WideVT = MVT::getVectorVT(VTElt, NumElts);
|
||||
V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V);
|
||||
V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V);
|
||||
}
|
||||
|
||||
// Extract back to 128/256-bit if we widened.
|
||||
@ -18681,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
|
||||
// Replace chain users with the new chain.
|
||||
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
|
||||
if (Subtarget.hasVLX()) {
|
||||
// Extract to v4i1/v2i1.
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// Finally, do a normal sign-extend to the desired register.
|
||||
return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
|
||||
}
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
|
||||
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load);
|
||||
|
||||
@ -18700,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
|
||||
|
||||
if (NumElts <= 8) {
|
||||
// A subset, assume that we have only AVX-512F
|
||||
unsigned NumBitsToLoad = 8;
|
||||
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
|
||||
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
|
||||
SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(),
|
||||
Ld->getBasePtr(),
|
||||
Ld->getMemOperand());
|
||||
// Replace chain users with the new chain.
|
||||
assert(Load->getNumValues() == 2 && "Loads must carry a chain!");
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1));
|
||||
|
||||
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad);
|
||||
SDValue BitVec = DAG.getBitcast(MaskVT, Load);
|
||||
SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load);
|
||||
|
||||
if (NumElts == 8)
|
||||
return DAG.getNode(ExtOpcode, dl, VT, BitVec);
|
||||
|
||||
// we should take care to v4i1 and v2i1
|
||||
if (Subtarget.hasVLX()) {
|
||||
// Extract to v4i1/v2i1.
|
||||
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// Finally, do a normal sign-extend to the desired register.
|
||||
return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract);
|
||||
}
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8);
|
||||
SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec);
|
||||
@ -18730,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
|
||||
Ld->getBasePtr(),
|
||||
Ld->getMemOperand());
|
||||
|
||||
SDValue BasePtrHi =
|
||||
DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||
DAG.getConstant(2, dl, BasePtr.getValueType()));
|
||||
SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);
|
||||
|
||||
SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(),
|
||||
BasePtrHi,
|
||||
Ld->getMemOperand());
|
||||
SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi,
|
||||
Ld->getPointerInfo().getWithOffset(2),
|
||||
MinAlign(Ld->getAlignment(), 2U),
|
||||
Ld->getMemOperand()->getFlags());
|
||||
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
LoadLo.getValue(1), LoadHi.getValue(1));
|
||||
@ -22086,7 +22094,14 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
|
||||
if (VT == MVT::v4i32) {
|
||||
assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() &&
|
||||
"Should not custom lower when pmuldq is available!");
|
||||
"Should not custom lower when pmulld is available!");
|
||||
|
||||
// If the upper 17 bits of each element are zero then we can use PMADD.
|
||||
APInt Mask17 = APInt::getHighBitsSet(32, 17);
|
||||
if (DAG.MaskedValueIsZero(A, Mask17) && DAG.MaskedValueIsZero(B, Mask17))
|
||||
return DAG.getNode(X86ISD::VPMADDWD, dl, VT,
|
||||
DAG.getBitcast(MVT::v8i16, A),
|
||||
DAG.getBitcast(MVT::v8i16, B));
|
||||
|
||||
// Extract the odd parts.
|
||||
static const int UnpackMask[] = { 1, -1, 3, -1 };
|
||||
@ -22138,6 +22153,11 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
|
||||
bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
|
||||
bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
|
||||
|
||||
// If DQI is supported we can use MULLQ, but MULUDQ is still better if the
|
||||
// the high bits are known to be zero.
|
||||
if (Subtarget.hasDQI() && (!AHiIsZero || !BHiIsZero))
|
||||
return Op;
|
||||
|
||||
// Bit cast to 32-bit vectors for MULUDQ.
|
||||
SDValue Alo = DAG.getBitcast(MulVT, A);
|
||||
SDValue Blo = DAG.getBitcast(MulVT, B);
|
||||
@ -31001,8 +31021,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
|
||||
}
|
||||
|
||||
// The replacement was made in place; don't return anything.
|
||||
return SDValue();
|
||||
// The replacement was made in place; return N so it won't be revisited.
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
/// If a vector select has an operand that is -1 or 0, try to simplify the
|
||||
@ -32256,6 +32276,13 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
|
||||
if ((NumElts % 2) != 0)
|
||||
return SDValue();
|
||||
|
||||
// If the upper 17 bits of each element are zero then we can use PMADD.
|
||||
APInt Mask17 = APInt::getHighBitsSet(32, 17);
|
||||
if (VT == MVT::v4i32 && DAG.MaskedValueIsZero(N0, Mask17) &&
|
||||
DAG.MaskedValueIsZero(N1, Mask17))
|
||||
return DAG.getNode(X86ISD::VPMADDWD, DL, VT, DAG.getBitcast(MVT::v8i16, N0),
|
||||
DAG.getBitcast(MVT::v8i16, N1));
|
||||
|
||||
unsigned RegSize = 128;
|
||||
MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16);
|
||||
EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
|
||||
@ -33047,10 +33074,8 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
// The right side has to be a 'trunc' or a constant vector.
|
||||
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
|
||||
N1.getOperand(0).getValueType() == VT;
|
||||
ConstantSDNode *RHSConstSplat = nullptr;
|
||||
if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
|
||||
RHSConstSplat = RHSBV->getConstantSplatNode();
|
||||
if (!RHSTrunc && !RHSConstSplat)
|
||||
if (!RHSTrunc &&
|
||||
!ISD::isBuildVectorOfConstantSDNodes(N1.getNode()))
|
||||
return SDValue();
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
@ -33060,13 +33085,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Set N0 and N1 to hold the inputs to the new wide operation.
|
||||
N0 = N0->getOperand(0);
|
||||
if (RHSConstSplat) {
|
||||
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT.getVectorElementType(),
|
||||
SDValue(RHSConstSplat, 0));
|
||||
N1 = DAG.getSplatBuildVector(VT, DL, N1);
|
||||
} else if (RHSTrunc) {
|
||||
if (RHSTrunc)
|
||||
N1 = N1->getOperand(0);
|
||||
}
|
||||
else
|
||||
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1);
|
||||
|
||||
// Generate the wide operation.
|
||||
SDValue Op = DAG.getNode(Narrow->getOpcode(), DL, VT, N0, N1);
|
||||
@ -34039,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl);
|
||||
SDValue Load2 =
|
||||
DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
|
||||
std::min(16U, Alignment), Ld->getMemOperand()->getFlags());
|
||||
DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
|
||||
Ld->getPointerInfo().getWithOffset(16),
|
||||
MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags());
|
||||
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
Load1.getValue(1),
|
||||
Load2.getValue(1));
|
||||
|
||||
SDValue NewVec = DAG.getUNDEF(RegVT);
|
||||
NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl);
|
||||
NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl);
|
||||
SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2);
|
||||
return DCI.CombineTo(N, NewVec, TF, true);
|
||||
}
|
||||
|
||||
@ -34453,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
|
||||
DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(),
|
||||
Alignment, St->getMemOperand()->getFlags());
|
||||
SDValue Ch1 =
|
||||
DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(),
|
||||
std::min(16U, Alignment), St->getMemOperand()->getFlags());
|
||||
DAG.getStore(St->getChain(), dl, Value1, Ptr1,
|
||||
St->getPointerInfo().getWithOffset(16),
|
||||
MinAlign(Alignment, 16U), St->getMemOperand()->getFlags());
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
|
||||
}
|
||||
|
||||
@ -34876,7 +34898,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
|
||||
// better to truncate if we have the chance.
|
||||
if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
|
||||
!TLI.isOperationLegal(Opcode, SrcVT))
|
||||
!Subtarget.hasDQI())
|
||||
return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
|
||||
LLVM_FALLTHROUGH;
|
||||
case ISD::ADD: {
|
||||
|
@ -1167,7 +1167,6 @@ namespace llvm {
|
||||
bool isReplace) const;
|
||||
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
@ -1183,9 +1182,6 @@ namespace llvm {
|
||||
|
||||
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -4420,12 +4420,12 @@ defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
|
||||
defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 0>;
|
||||
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
|
||||
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
|
||||
SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
|
||||
defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
|
||||
SSE_INTALU_ITINS_P, HasBWI, 1>;
|
||||
SSE_INTMUL_ITINS_P, HasBWI, 1>;
|
||||
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
|
||||
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
|
||||
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
|
||||
SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
|
||||
defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
|
||||
HasBWI, 1>;
|
||||
defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
|
||||
HasBWI, 1>;
|
||||
@ -4454,7 +4454,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
|
||||
}
|
||||
}
|
||||
|
||||
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
|
||||
defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
|
||||
avx512vl_i32_info, avx512vl_i64_info,
|
||||
X86pmuldq, HasAVX512, 1>,T8PD;
|
||||
defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
|
||||
@ -8704,17 +8704,6 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
|
||||
IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
|
||||
}
|
||||
|
||||
// Use 512bit version to implement 128/256 bit in case NoVLX.
|
||||
multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
|
||||
X86VectorVTInfo _> {
|
||||
|
||||
def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
|
||||
(X86Info.VT (EXTRACT_SUBREG
|
||||
(_.VT (!cast<Instruction>(NAME#"Zrr")
|
||||
(_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
|
||||
X86Info.SubRegIdx))>;
|
||||
}
|
||||
|
||||
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
|
||||
string OpcodeStr, Predicate prd> {
|
||||
let Predicates = [prd] in
|
||||
@ -8724,11 +8713,6 @@ let Predicates = [prd] in
|
||||
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
|
||||
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
|
||||
}
|
||||
let Predicates = [prd, NoVLX] in {
|
||||
defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
|
||||
defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
|
||||
|
@ -141,6 +141,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
|
||||
// These instructions cannot address 80-bit memory.
|
||||
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
|
||||
bit Forward = 1> {
|
||||
let mayLoad = 1, hasSideEffects = 1 in {
|
||||
// ST(0) = ST(0) + [mem]
|
||||
def _Fp32m : FpIf32<(outs RFP32:$dst),
|
||||
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
|
||||
@ -177,10 +178,8 @@ def _Fp80m64: FpI_<(outs RFP80:$dst),
|
||||
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
|
||||
let mayLoad = 1 in
|
||||
def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
|
||||
!strconcat("f", asmstring, "{s}\t$src")>;
|
||||
let mayLoad = 1 in
|
||||
def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
|
||||
!strconcat("f", asmstring, "{l}\t$src")>;
|
||||
// ST(0) = ST(0) + [memint]
|
||||
@ -226,12 +225,11 @@ def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
|
||||
(OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
|
||||
(set RFP80:$dst,
|
||||
(OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
|
||||
let mayLoad = 1 in
|
||||
def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
|
||||
!strconcat("fi", asmstring, "{s}\t$src")>;
|
||||
let mayLoad = 1 in
|
||||
def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
|
||||
!strconcat("fi", asmstring, "{l}\t$src")>;
|
||||
} // mayLoad = 1, hasSideEffects = 1
|
||||
}
|
||||
|
||||
let Defs = [FPSW] in {
|
||||
|
@ -832,9 +832,11 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">;
|
||||
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
|
||||
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
|
||||
def PKU : Predicate<"Subtarget->hasPKU()">;
|
||||
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
|
||||
def HasVNNI : Predicate<"Subtarget->hasVNNI()">,
|
||||
AssemblerPredicate<"FeatureVNNI", "AVX-512 VNNI ISA">;
|
||||
|
||||
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
|
||||
def HasBITALG : Predicate<"Subtarget->hasBITALG()">,
|
||||
AssemblerPredicate<"FeatureBITALG", "AVX-512 BITALG ISA">;
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
def HasVAES : Predicate<"Subtarget->hasVAES()">;
|
||||
@ -866,7 +868,8 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
|
||||
def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
|
||||
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
|
||||
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
|
||||
def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
|
||||
def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">,
|
||||
AssemblerPredicate<"FeatureVBMI2", "AVX-512 VBMI2 ISA">;
|
||||
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
|
||||
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
|
||||
def HasRTM : Predicate<"Subtarget->hasRTM()">;
|
||||
|
@ -3734,7 +3734,7 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -3742,8 +3742,8 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
@ -6313,7 +6313,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -6321,8 +6321,8 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
|
||||
@ -6338,7 +6338,7 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
@ -6346,8 +6346,8 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2)))))]>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
(bitconvert (memop_frag addr:$src2)))))],
|
||||
itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
@ -6924,14 +6924,15 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, Sched<[itins.Sched]>;
|
||||
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
|
||||
Sched<[itins.Sched]>;
|
||||
def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!if(Is2Addr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set RC:$dst,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>,
|
||||
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))], itins.rm>,
|
||||
Sched<[itins.Sched.Folded, ReadAfterLd]>;
|
||||
}
|
||||
|
||||
|
@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
}
|
||||
|
||||
bool WinEHStatePass::runOnFunction(Function &F) {
|
||||
// Don't insert state stores or exception handler thunks for
|
||||
// available_externally functions. The handler needs to reference the LSDA,
|
||||
// which will not be emitted in this case.
|
||||
if (F.hasAvailableExternallyLinkage())
|
||||
return false;
|
||||
|
||||
// Check the personality. Do nothing if this personality doesn't use funclets.
|
||||
if (!F.hasPersonalityFn())
|
||||
return false;
|
||||
|
@ -181,8 +181,9 @@ class SampleProfileLoader {
|
||||
StringRef Name, bool IsThinLTOPreLink,
|
||||
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
|
||||
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
|
||||
: GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo),
|
||||
Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {}
|
||||
: GetAC(std::move(GetAssumptionCache)),
|
||||
GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
|
||||
IsThinLTOPreLink(IsThinLTOPreLink) {}
|
||||
|
||||
bool doInitialization(Module &M);
|
||||
bool runOnModule(Module &M, ModuleAnalysisManager *AM);
|
||||
@ -1547,14 +1548,14 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM) {
|
||||
|
||||
// Populate the symbol map.
|
||||
for (const auto &N_F : M.getValueSymbolTable()) {
|
||||
std::string OrigName = N_F.getKey();
|
||||
StringRef OrigName = N_F.getKey();
|
||||
Function *F = dyn_cast<Function>(N_F.getValue());
|
||||
if (F == nullptr)
|
||||
continue;
|
||||
SymbolMap[OrigName] = F;
|
||||
auto pos = OrigName.find('.');
|
||||
if (pos != std::string::npos) {
|
||||
std::string NewName = OrigName.substr(0, pos);
|
||||
if (pos != StringRef::npos) {
|
||||
StringRef NewName = OrigName.substr(0, pos);
|
||||
auto r = SymbolMap.insert(std::make_pair(NewName, F));
|
||||
// Failiing to insert means there is already an entry in SymbolMap,
|
||||
// thus there are multiple functions that are mapped to the same
|
||||
|
@ -90,8 +90,7 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
|
||||
if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
|
||||
Metadata *&GlobalMD = LocalToGlobal[MD];
|
||||
if (!GlobalMD) {
|
||||
std::string NewName =
|
||||
(to_string(LocalToGlobal.size()) + ModuleId).str();
|
||||
std::string NewName = (Twine(LocalToGlobal.size()) + ModuleId).str();
|
||||
GlobalMD = MDString::get(M.getContext(), NewName);
|
||||
}
|
||||
|
||||
|
@ -1802,9 +1802,7 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
|
||||
/// instructions. For normal calls, it allows visitCallSite to do the heavy
|
||||
/// lifting.
|
||||
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
auto Args = CI.arg_operands();
|
||||
if (Value *V = SimplifyCall(&CI, CI.getCalledValue(), Args.begin(),
|
||||
Args.end(), SQ.getWithInstruction(&CI)))
|
||||
if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
|
||||
return replaceInstUsesWith(CI, V);
|
||||
|
||||
if (isFreeCall(&CI, &TLI))
|
||||
@ -1903,16 +1901,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
|
||||
return replaceInstUsesWith(CI, N);
|
||||
return nullptr;
|
||||
|
||||
case Intrinsic::bswap: {
|
||||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
// TODO should this be in InstSimplify?
|
||||
// bswap(bswap(x)) -> x
|
||||
if (match(IIOperand, m_BSwap(m_Value(X))))
|
||||
return replaceInstUsesWith(CI, X);
|
||||
|
||||
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
|
||||
if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
|
||||
unsigned C = X->getType()->getPrimitiveSizeInBits() -
|
||||
@ -1923,18 +1915,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::bitreverse: {
|
||||
Value *IIOperand = II->getArgOperand(0);
|
||||
Value *X = nullptr;
|
||||
|
||||
// TODO should this be in InstSimplify?
|
||||
// bitreverse(bitreverse(x)) -> x
|
||||
if (match(IIOperand, m_BitReverse(m_Value(X))))
|
||||
return replaceInstUsesWith(CI, X);
|
||||
break;
|
||||
}
|
||||
|
||||
case Intrinsic::masked_load:
|
||||
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
|
||||
return replaceInstUsesWith(CI, SimplifiedMaskedOp);
|
||||
@ -1948,16 +1928,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
|
||||
case Intrinsic::powi:
|
||||
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
|
||||
// powi(x, 0) -> 1.0
|
||||
if (Power->isZero())
|
||||
return replaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
|
||||
// powi(x, 1) -> x
|
||||
if (Power->isOne())
|
||||
return replaceInstUsesWith(CI, II->getArgOperand(0));
|
||||
// 0 and 1 are handled in instsimplify
|
||||
|
||||
// powi(x, -1) -> 1/x
|
||||
if (Power->isMinusOne())
|
||||
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
|
||||
II->getArgOperand(0));
|
||||
// powi(x, 2) -> x*x
|
||||
if (Power->equalsInt(2))
|
||||
return BinaryOperator::CreateFMul(II->getArgOperand(0),
|
||||
II->getArgOperand(0));
|
||||
}
|
||||
break;
|
||||
|
||||
@ -2396,7 +2376,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
// The compare intrinsic uses the above assumptions and therefore
|
||||
// doesn't require additional flags.
|
||||
if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
|
||||
match(Arg1, m_Zero()) &&
|
||||
match(Arg1, m_Zero()) && isa<Instruction>(Arg0) &&
|
||||
cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
|
||||
if (Arg0IsZero)
|
||||
std::swap(A, B);
|
||||
|
@ -1631,9 +1631,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
|
||||
SQ.getWithInstruction(&I)))
|
||||
return replaceInstUsesWith(I, V);
|
||||
|
||||
// Handle cases involving: rem X, (select Cond, Y, Z)
|
||||
if (simplifyDivRemOfSelectWithZeroOp(I))
|
||||
return &I;
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -181,11 +181,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
|
||||
unsigned IndexVal = IdxC->getZExtValue();
|
||||
unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
|
||||
|
||||
// InstSimplify handles cases where the index is invalid.
|
||||
assert(IndexVal < VectorWidth);
|
||||
// InstSimplify should handle cases where the index is invalid.
|
||||
if (!IdxC->getValue().ule(VectorWidth))
|
||||
return nullptr;
|
||||
|
||||
unsigned IndexVal = IdxC->getZExtValue();
|
||||
|
||||
// This instruction only demands the single element from the input vector.
|
||||
// If the input vector has a single use, simplify it based on this use
|
||||
|
@ -2702,9 +2702,10 @@ void FunctionStackPoisoner::copyArgsPassedByValToAllocas() {
|
||||
unsigned Align = Arg.getParamAlignment();
|
||||
if (Align == 0) Align = DL.getABITypeAlignment(Ty);
|
||||
|
||||
const std::string &Name = Arg.hasName() ? Arg.getName().str() :
|
||||
"Arg" + llvm::to_string(Arg.getArgNo());
|
||||
AllocaInst *AI = IRB.CreateAlloca(Ty, nullptr, Twine(Name) + ".byval");
|
||||
AllocaInst *AI = IRB.CreateAlloca(
|
||||
Ty, nullptr,
|
||||
(Arg.hasName() ? Arg.getName() : "Arg" + Twine(Arg.getArgNo())) +
|
||||
".byval");
|
||||
AI->setAlignment(Align);
|
||||
Arg.replaceAllUsesWith(AI);
|
||||
|
||||
|
@ -641,7 +641,7 @@ Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
|
||||
DenseMap<uint32_t, unsigned> VNums;
|
||||
for (auto *I : Insts) {
|
||||
uint32_t N = VN.lookupOrAdd(I);
|
||||
DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n");
|
||||
DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
|
||||
if (N == ~0U)
|
||||
return None;
|
||||
VNums[N]++;
|
||||
|
@ -476,33 +476,22 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||
Alignment = DL.getABITypeAlignment(EltType);
|
||||
}
|
||||
|
||||
// Remember the debug location.
|
||||
DebugLoc Loc;
|
||||
if (!Range.TheStores.empty())
|
||||
Loc = Range.TheStores[0]->getDebugLoc();
|
||||
AMemSet =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
|
||||
DEBUG(dbgs() << "Replace stores:\n";
|
||||
for (Instruction *SI : Range.TheStores)
|
||||
dbgs() << *SI << '\n');
|
||||
dbgs() << *SI << '\n';
|
||||
dbgs() << "With: " << *AMemSet << '\n');
|
||||
|
||||
if (!Range.TheStores.empty())
|
||||
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
|
||||
|
||||
// Zap all the stores.
|
||||
for (Instruction *SI : Range.TheStores) {
|
||||
MD->removeInstruction(SI);
|
||||
SI->eraseFromParent();
|
||||
}
|
||||
|
||||
// Create the memset after removing the stores, so that if there any cached
|
||||
// non-local dependencies on the removed instructions in
|
||||
// MemoryDependenceAnalysis, the cache entries are updated to "dirty"
|
||||
// entries pointing below the memset, so subsequent queries include the
|
||||
// memset.
|
||||
AMemSet =
|
||||
Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
|
||||
if (!Range.TheStores.empty())
|
||||
AMemSet->setDebugLoc(Loc);
|
||||
|
||||
DEBUG(dbgs() << "With: " << *AMemSet << '\n');
|
||||
|
||||
++NumMemSetInfer;
|
||||
}
|
||||
|
||||
@ -1042,22 +1031,9 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
|
||||
//
|
||||
// NOTE: This is conservative, it will stop on any read from the source loc,
|
||||
// not just the defining memcpy.
|
||||
MemoryLocation SourceLoc = MemoryLocation::getForSource(MDep);
|
||||
MemDepResult SourceDep = MD->getPointerDependencyFrom(SourceLoc, false,
|
||||
M->getIterator(), M->getParent());
|
||||
|
||||
if (SourceDep.isNonLocal()) {
|
||||
SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
|
||||
MD->getNonLocalPointerDependencyFrom(M, SourceLoc, /*isLoad=*/false,
|
||||
NonLocalDepResults);
|
||||
if (NonLocalDepResults.size() == 1) {
|
||||
SourceDep = NonLocalDepResults[0].getResult();
|
||||
assert((!SourceDep.getInst() ||
|
||||
LookupDomTree().dominates(SourceDep.getInst(), M)) &&
|
||||
"when memdep returns exactly one result, it should dominate");
|
||||
}
|
||||
}
|
||||
|
||||
MemDepResult SourceDep =
|
||||
MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
|
||||
M->getIterator(), M->getParent());
|
||||
if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
|
||||
return false;
|
||||
|
||||
@ -1259,18 +1235,6 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
|
||||
MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
|
||||
SrcLoc, true, M->getIterator(), M->getParent());
|
||||
|
||||
if (SrcDepInfo.isNonLocal()) {
|
||||
SmallVector<NonLocalDepResult, 2> NonLocalDepResults;
|
||||
MD->getNonLocalPointerDependencyFrom(M, SrcLoc, /*isLoad=*/true,
|
||||
NonLocalDepResults);
|
||||
if (NonLocalDepResults.size() == 1) {
|
||||
SrcDepInfo = NonLocalDepResults[0].getResult();
|
||||
assert((!SrcDepInfo.getInst() ||
|
||||
LookupDomTree().dominates(SrcDepInfo.getInst(), M)) &&
|
||||
"when memdep returns exactly one result, it should dominate");
|
||||
}
|
||||
}
|
||||
|
||||
if (SrcDepInfo.isClobber()) {
|
||||
if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
|
||||
return processMemCpyMemCpyDependence(M, MDep);
|
||||
|
@ -2796,17 +2796,12 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
|
||||
StatepointLiveSetTy Updated;
|
||||
findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
|
||||
|
||||
#ifndef NDEBUG
|
||||
DenseSet<Value *> Bases;
|
||||
for (auto KVPair : Info.PointerToBase)
|
||||
Bases.insert(KVPair.second);
|
||||
#endif
|
||||
|
||||
// We may have base pointers which are now live that weren't before. We need
|
||||
// to update the PointerToBase structure to reflect this.
|
||||
for (auto V : Updated)
|
||||
if (Info.PointerToBase.insert({V, V}).second) {
|
||||
assert(Bases.count(V) && "Can't find base for unexpected live value!");
|
||||
assert(isKnownBaseResult(V) &&
|
||||
"Can't find base for unexpected live value!");
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -648,8 +648,13 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
|
||||
SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
|
||||
NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
|
||||
DT, LI, PreserveLCSSA);
|
||||
// NewExit gets its DebugLoc from LatchExit, which is not part of the
|
||||
// original Loop.
|
||||
// Fix this by setting Loop's DebugLoc to NewExit.
|
||||
auto *NewExitTerminator = NewExit->getTerminator();
|
||||
NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc());
|
||||
// Split NewExit to insert epilog remainder loop.
|
||||
EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
|
||||
EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
|
||||
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
|
||||
} else {
|
||||
// If prolog remainder
|
||||
|
@ -880,9 +880,10 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
|
||||
/// If we are able to find such sequence, we return the instructions
|
||||
/// we found, namely %casted_phi and the instructions on its use-def chain up
|
||||
/// to the phi (not including the phi).
|
||||
bool getCastsForInductionPHI(
|
||||
PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev,
|
||||
const SCEVAddRecExpr *AR, SmallVectorImpl<Instruction *> &CastInsts) {
|
||||
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
|
||||
const SCEVUnknown *PhiScev,
|
||||
const SCEVAddRecExpr *AR,
|
||||
SmallVectorImpl<Instruction *> &CastInsts) {
|
||||
|
||||
assert(CastInsts.empty() && "CastInsts is expected to be empty.");
|
||||
auto *PN = cast<PHINode>(PhiScev->getValue());
|
||||
|
@ -127,16 +127,6 @@ static cl::opt<unsigned> MaxSpeculationDepth(
|
||||
cl::desc("Limit maximum recursion depth when calculating costs of "
|
||||
"speculatively executed instructions"));
|
||||
|
||||
static cl::opt<unsigned> DependenceChainLatency(
|
||||
"dependence-chain-latency", cl::Hidden, cl::init(8),
|
||||
cl::desc("Limit the maximum latency of dependence chain containing cmp "
|
||||
"for if conversion"));
|
||||
|
||||
static cl::opt<unsigned> SmallBBSize(
|
||||
"small-bb-size", cl::Hidden, cl::init(40),
|
||||
cl::desc("Check dependence chain latency only in basic block smaller than "
|
||||
"this number"));
|
||||
|
||||
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
|
||||
STATISTIC(NumLinearMaps,
|
||||
"Number of switch instructions turned into linear mapping");
|
||||
@ -405,166 +395,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Estimate the code size of the specified BB.
|
||||
static unsigned CountBBCodeSize(BasicBlock *BB,
|
||||
const TargetTransformInfo &TTI) {
|
||||
unsigned Size = 0;
|
||||
for (auto II = BB->begin(); !isa<TerminatorInst>(II); ++II)
|
||||
Size += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_CodeSize);
|
||||
return Size;
|
||||
}
|
||||
|
||||
/// Find out the latency of the longest dependence chain in the BB if
|
||||
/// LongestChain is true, or the dependence chain containing the compare
|
||||
/// instruction feeding the block's conditional branch.
|
||||
static unsigned FindDependenceChainLatency(BasicBlock *BB,
|
||||
DenseMap<Instruction *, unsigned> &Instructions,
|
||||
const TargetTransformInfo &TTI,
|
||||
bool LongestChain) {
|
||||
unsigned MaxLatency = 0;
|
||||
|
||||
BasicBlock::iterator II;
|
||||
for (II = BB->begin(); !isa<TerminatorInst>(II); ++II) {
|
||||
unsigned Latency = 0;
|
||||
for (unsigned O = 0, E = II->getNumOperands(); O != E; ++O) {
|
||||
Instruction *Op = dyn_cast<Instruction>(II->getOperand(O));
|
||||
if (Op && Instructions.count(Op)) {
|
||||
auto OpLatency = Instructions[Op];
|
||||
if (OpLatency > Latency)
|
||||
Latency = OpLatency;
|
||||
}
|
||||
}
|
||||
Latency += TTI.getInstructionCost(&(*II), TargetTransformInfo::TCK_Latency);
|
||||
Instructions[&(*II)] = Latency;
|
||||
|
||||
if (Latency > MaxLatency)
|
||||
MaxLatency = Latency;
|
||||
}
|
||||
|
||||
if (LongestChain)
|
||||
return MaxLatency;
|
||||
|
||||
// The length of the dependence chain containing the compare instruction is
|
||||
// wanted, so the terminator must be a BranchInst.
|
||||
assert(isa<BranchInst>(II));
|
||||
BranchInst* Br = cast<BranchInst>(II);
|
||||
Instruction *Cmp = dyn_cast<Instruction>(Br->getCondition());
|
||||
if (Cmp && Instructions.count(Cmp))
|
||||
return Instructions[Cmp];
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Instructions in BB2 may depend on instructions in BB1, and instructions
|
||||
/// in BB1 may have users in BB2. If the last (in terms of latency) such kind
|
||||
/// of instruction in BB1 is I, then the instructions after I can be executed
|
||||
/// in parallel with instructions in BB2.
|
||||
/// This function returns the latency of I.
|
||||
static unsigned LatencyAdjustment(BasicBlock *BB1, BasicBlock *BB2,
|
||||
BasicBlock *IfBlock1, BasicBlock *IfBlock2,
|
||||
DenseMap<Instruction *, unsigned> &BB1Instructions) {
|
||||
unsigned LastLatency = 0;
|
||||
SmallVector<Instruction *, 16> Worklist;
|
||||
BasicBlock::iterator II;
|
||||
for (II = BB2->begin(); !isa<TerminatorInst>(II); ++II) {
|
||||
if (PHINode *PN = dyn_cast<PHINode>(II)) {
|
||||
// Look for users in BB2.
|
||||
bool InBBUser = false;
|
||||
for (User *U : PN->users()) {
|
||||
if (cast<Instruction>(U)->getParent() == BB2) {
|
||||
InBBUser = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// No such user, we don't care about this instruction and its operands.
|
||||
if (!InBBUser)
|
||||
break;
|
||||
}
|
||||
Worklist.push_back(&(*II));
|
||||
}
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *I = Worklist.pop_back_val();
|
||||
for (unsigned O = 0, E = I->getNumOperands(); O != E; ++O) {
|
||||
if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(O))) {
|
||||
if (Op->getParent() == IfBlock1 || Op->getParent() == IfBlock2)
|
||||
Worklist.push_back(Op);
|
||||
else if (Op->getParent() == BB1 && BB1Instructions.count(Op)) {
|
||||
if (BB1Instructions[Op] > LastLatency)
|
||||
LastLatency = BB1Instructions[Op];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return LastLatency;
|
||||
}
|
||||
|
||||
/// If after if conversion, most of the instructions in this new BB construct a
|
||||
/// long and slow dependence chain, it may be slower than cmp/branch, even
|
||||
/// if the branch has a high miss rate, because the control dependence is
|
||||
/// transformed into data dependence, and control dependence can be speculated,
|
||||
/// and thus, the second part can execute in parallel with the first part on
|
||||
/// modern OOO processor.
|
||||
///
|
||||
/// To check this condition, this function finds the length of the dependence
|
||||
/// chain in BB1 (only the part that can be executed in parallel with code after
|
||||
/// branch in BB2) containing cmp, and if the length is longer than a threshold,
|
||||
/// don't perform if conversion.
|
||||
///
|
||||
/// BB1, BB2, IfBlock1 and IfBlock2 are candidate BBs for if conversion.
|
||||
/// SpeculationSize contains the code size of IfBlock1 and IfBlock2.
|
||||
static bool FindLongDependenceChain(BasicBlock *BB1, BasicBlock *BB2,
|
||||
BasicBlock *IfBlock1, BasicBlock *IfBlock2,
|
||||
unsigned SpeculationSize,
|
||||
const TargetTransformInfo &TTI) {
|
||||
// Accumulated latency of each instruction in their BBs.
|
||||
DenseMap<Instruction *, unsigned> BB1Instructions;
|
||||
DenseMap<Instruction *, unsigned> BB2Instructions;
|
||||
|
||||
if (!TTI.isOutOfOrder())
|
||||
return false;
|
||||
|
||||
unsigned NewBBSize = CountBBCodeSize(BB1, TTI) + CountBBCodeSize(BB2, TTI)
|
||||
+ SpeculationSize;
|
||||
|
||||
// We check small BB only since it is more difficult to find unrelated
|
||||
// instructions to fill functional units in a small BB.
|
||||
if (NewBBSize > SmallBBSize)
|
||||
return false;
|
||||
|
||||
auto BB1Chain =
|
||||
FindDependenceChainLatency(BB1, BB1Instructions, TTI, false);
|
||||
auto BB2Chain =
|
||||
FindDependenceChainLatency(BB2, BB2Instructions, TTI, true);
|
||||
|
||||
// If there are many unrelated instructions in the new BB, there will be
|
||||
// other instructions for the processor to issue regardless of the length
|
||||
// of this new dependence chain.
|
||||
// Modern processors can issue 3 or more instructions in each cycle. But in
|
||||
// real world applications, an IPC of 2 is already very good for non-loop
|
||||
// code with small basic blocks. Higher IPC is usually found in programs with
|
||||
// small kernel. So IPC of 2 is more reasonable for most applications.
|
||||
if ((BB1Chain + BB2Chain) * 2 <= NewBBSize)
|
||||
return false;
|
||||
|
||||
// We only care about part of the dependence chain in BB1 that can be
|
||||
// executed in parallel with BB2, so adjust the latency.
|
||||
BB1Chain -=
|
||||
LatencyAdjustment(BB1, BB2, IfBlock1, IfBlock2, BB1Instructions);
|
||||
|
||||
// Correctly predicted branch instruction can skip the dependence chain in
|
||||
// BB1, but misprediction has a penalty, so only when the dependence chain is
|
||||
// longer than DependenceChainLatency, then branch is better than select.
|
||||
// Besides misprediction penalty, the threshold value DependenceChainLatency
|
||||
// also depends on branch misprediction rate, taken branch latency and cmov
|
||||
// latency.
|
||||
if (BB1Chain >= DependenceChainLatency)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Extract ConstantInt from value, looking through IntToPtr
|
||||
/// and PointerNullValue. Return NULL if value is not a constant int.
|
||||
static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
|
||||
@ -2214,11 +2044,6 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
|
||||
if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
|
||||
return false;
|
||||
|
||||
// Don't do if conversion for long dependence chain.
|
||||
if (FindLongDependenceChain(BB, EndBB, ThenBB, nullptr,
|
||||
CountBBCodeSize(ThenBB, TTI), TTI))
|
||||
return false;
|
||||
|
||||
// If we get here, we can hoist the instruction and if-convert.
|
||||
DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
|
||||
|
||||
@ -2526,10 +2351,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
}
|
||||
}
|
||||
|
||||
if (FindLongDependenceChain(DomBlock, BB, IfBlock1, IfBlock2,
|
||||
AggressiveInsts.size(), TTI))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: "
|
||||
<< IfTrue->getName() << " F: " << IfFalse->getName() << "\n");
|
||||
|
||||
|
@ -592,15 +592,21 @@ class NullStmt : public Stmt {
|
||||
};
|
||||
|
||||
/// CompoundStmt - This represents a group of statements like { stmt stmt }.
|
||||
class CompoundStmt : public Stmt {
|
||||
class CompoundStmt final : public Stmt,
|
||||
private llvm::TrailingObjects<CompoundStmt, Stmt *> {
|
||||
friend class ASTStmtReader;
|
||||
friend TrailingObjects;
|
||||
|
||||
Stmt** Body = nullptr;
|
||||
SourceLocation LBraceLoc, RBraceLoc;
|
||||
|
||||
CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB, SourceLocation RB);
|
||||
explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {}
|
||||
|
||||
void setStmts(ArrayRef<Stmt *> Stmts);
|
||||
|
||||
public:
|
||||
CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts,
|
||||
SourceLocation LB, SourceLocation RB);
|
||||
static CompoundStmt *Create(const ASTContext &C, ArrayRef<Stmt *> Stmts,
|
||||
SourceLocation LB, SourceLocation RB);
|
||||
|
||||
// \brief Build an empty compound statement with a location.
|
||||
explicit CompoundStmt(SourceLocation Loc)
|
||||
@ -609,11 +615,7 @@ class CompoundStmt : public Stmt {
|
||||
}
|
||||
|
||||
// \brief Build an empty compound statement.
|
||||
explicit CompoundStmt(EmptyShell Empty) : Stmt(CompoundStmtClass, Empty) {
|
||||
CompoundStmtBits.NumStmts = 0;
|
||||
}
|
||||
|
||||
void setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts);
|
||||
static CompoundStmt *CreateEmpty(const ASTContext &C, unsigned NumStmts);
|
||||
|
||||
bool body_empty() const { return CompoundStmtBits.NumStmts == 0; }
|
||||
unsigned size() const { return CompoundStmtBits.NumStmts; }
|
||||
@ -622,14 +624,16 @@ class CompoundStmt : public Stmt {
|
||||
using body_range = llvm::iterator_range<body_iterator>;
|
||||
|
||||
body_range body() { return body_range(body_begin(), body_end()); }
|
||||
body_iterator body_begin() { return Body; }
|
||||
body_iterator body_end() { return Body + size(); }
|
||||
Stmt *body_front() { return !body_empty() ? Body[0] : nullptr; }
|
||||
Stmt *body_back() { return !body_empty() ? Body[size()-1] : nullptr; }
|
||||
body_iterator body_begin() { return getTrailingObjects<Stmt *>(); }
|
||||
body_iterator body_end() { return body_begin() + size(); }
|
||||
Stmt *body_front() { return !body_empty() ? body_begin()[0] : nullptr; }
|
||||
Stmt *body_back() {
|
||||
return !body_empty() ? body_begin()[size() - 1] : nullptr;
|
||||
}
|
||||
|
||||
void setLastStmt(Stmt *S) {
|
||||
assert(!body_empty() && "setLastStmt");
|
||||
Body[size()-1] = S;
|
||||
body_begin()[size() - 1] = S;
|
||||
}
|
||||
|
||||
using const_body_iterator = Stmt* const *;
|
||||
@ -639,15 +643,17 @@ class CompoundStmt : public Stmt {
|
||||
return body_const_range(body_begin(), body_end());
|
||||
}
|
||||
|
||||
const_body_iterator body_begin() const { return Body; }
|
||||
const_body_iterator body_end() const { return Body + size(); }
|
||||
const_body_iterator body_begin() const {
|
||||
return getTrailingObjects<Stmt *>();
|
||||
}
|
||||
const_body_iterator body_end() const { return body_begin() + size(); }
|
||||
|
||||
const Stmt *body_front() const {
|
||||
return !body_empty() ? Body[0] : nullptr;
|
||||
return !body_empty() ? body_begin()[0] : nullptr;
|
||||
}
|
||||
|
||||
const Stmt *body_back() const {
|
||||
return !body_empty() ? Body[size() - 1] : nullptr;
|
||||
return !body_empty() ? body_begin()[size() - 1] : nullptr;
|
||||
}
|
||||
|
||||
using reverse_body_iterator = std::reverse_iterator<body_iterator>;
|
||||
@ -682,13 +688,10 @@ class CompoundStmt : public Stmt {
|
||||
}
|
||||
|
||||
// Iterators
|
||||
child_range children() {
|
||||
return child_range(Body, Body + CompoundStmtBits.NumStmts);
|
||||
}
|
||||
child_range children() { return child_range(body_begin(), body_end()); }
|
||||
|
||||
const_child_range children() const {
|
||||
return const_child_range(child_iterator(Body),
|
||||
child_iterator(Body + CompoundStmtBits.NumStmts));
|
||||
return const_child_range(body_begin(), body_end());
|
||||
}
|
||||
};
|
||||
|
||||
@ -875,8 +878,11 @@ class LabelStmt : public Stmt {
|
||||
///
|
||||
/// Represents an attribute applied to a statement. For example:
|
||||
/// [[omp::for(...)]] for (...) { ... }
|
||||
class AttributedStmt : public Stmt {
|
||||
class AttributedStmt final
|
||||
: public Stmt,
|
||||
private llvm::TrailingObjects<AttributedStmt, const Attr *> {
|
||||
friend class ASTStmtReader;
|
||||
friend TrailingObjects;
|
||||
|
||||
Stmt *SubStmt;
|
||||
SourceLocation AttrLoc;
|
||||
@ -894,11 +900,9 @@ class AttributedStmt : public Stmt {
|
||||
}
|
||||
|
||||
const Attr *const *getAttrArrayPtr() const {
|
||||
return reinterpret_cast<const Attr *const *>(this + 1);
|
||||
}
|
||||
const Attr **getAttrArrayPtr() {
|
||||
return reinterpret_cast<const Attr **>(this + 1);
|
||||
return getTrailingObjects<const Attr *>();
|
||||
}
|
||||
const Attr **getAttrArrayPtr() { return getTrailingObjects<const Attr *>(); }
|
||||
|
||||
public:
|
||||
static AttributedStmt *Create(const ASTContext &C, SourceLocation Loc,
|
||||
|
@ -429,9 +429,34 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "", "aes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes")
|
||||
|
||||
// VAES
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes")
|
||||
TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes")
|
||||
|
||||
// GFNI
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "", "gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "", "gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "", "gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "", "avx,gfni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gfni")
|
||||
|
||||
// CLMUL
|
||||
TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
|
||||
|
||||
// VPCLMULQDQ
|
||||
TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "", "vpclmulqdq")
|
||||
TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "", "avx512f,vpclmulqdq")
|
||||
|
||||
// AVX
|
||||
TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx")
|
||||
TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx")
|
||||
@ -954,6 +979,31 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "",
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_mask, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpbusds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vnni")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpdpwssds512_maskz, "V16iV16iV16iV16iUs", "", "avx512vnni")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi","","avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi","","avx512vl")
|
||||
@ -1067,6 +1117,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "", "avx512vpopcntdq,a
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
|
||||
@ -1107,6 +1168,12 @@ TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc","","avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs","","avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs","","avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi","","avx512vl,avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "", "avx512vl")
|
||||
@ -1115,6 +1182,12 @@ TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "", "avx512
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "", "avx512vl")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "", "avx512vl,avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl")
|
||||
@ -1147,10 +1220,22 @@ TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2d*V2dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4d*V4dUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLi*V2LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLi*V4LLiUc", "", "avx512vl")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "", "avx512vl,avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4f*V4fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8f*V8fUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4i*V4iUc", "", "avx512vl")
|
||||
@ -1223,6 +1308,65 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "",
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "", "avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "", "avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iiV4iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iiV8iUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iiV16iUs", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiiV2LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiiV4LLiUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiiV8LLiUc", "", "avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8siV8sUc", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16siV16sUs", "", "avx512vl,avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32siV32sUi", "", "avx512vbmi2")
|
||||
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw")
|
||||
@ -1677,14 +1821,20 @@ TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi","","avx51
|
||||
TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c","","avx512bw")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f")
|
||||
@ -1692,6 +1842,8 @@ TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi","","avx512vbmi2")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs","","avx512f")
|
||||
TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc","","avx512vl")
|
||||
|
@ -587,6 +587,7 @@ def ext_using_attribute_ns : ExtWarn<
|
||||
def err_using_attribute_ns_conflict : Error<
|
||||
"attribute with scope specifier cannot follow default scope specifier">;
|
||||
def err_attributes_not_allowed : Error<"an attribute list cannot appear here">;
|
||||
def err_attributes_misplaced : Error<"misplaced attributes; expected attributes here">;
|
||||
def err_l_square_l_square_not_attribute : Error<
|
||||
"C++11 only allows consecutive left square brackets when "
|
||||
"introducing an attribute">;
|
||||
|
@ -2467,6 +2467,8 @@ def mavx2 : Flag<["-"], "mavx2">, Group<m_x86_Features_Group>;
|
||||
def mno_avx2 : Flag<["-"], "mno-avx2">, Group<m_x86_Features_Group>;
|
||||
def mavx512f : Flag<["-"], "mavx512f">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512f : Flag<["-"], "mno-avx512f">, Group<m_x86_Features_Group>;
|
||||
def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group<m_x86_Features_Group>;
|
||||
def mavx512bw : Flag<["-"], "mavx512bw">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group<m_x86_Features_Group>;
|
||||
def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
|
||||
@ -2481,8 +2483,12 @@ def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
|
||||
def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
|
||||
def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vbmi2 : Flag<["-"], "mno-avx512vbmi2">, Group<m_x86_Features_Group>;
|
||||
def mavx512vl : Flag<["-"], "mavx512vl">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vl : Flag<["-"], "mno-avx512vl">, Group<m_x86_Features_Group>;
|
||||
def mavx512vnni : Flag<["-"], "mavx512vnni">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vnni : Flag<["-"], "mno-avx512vnni">, Group<m_x86_Features_Group>;
|
||||
def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group<m_x86_Features_Group>;
|
||||
def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group<m_x86_Features_Group>;
|
||||
def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
|
||||
@ -2511,6 +2517,8 @@ def mfsgsbase : Flag<["-"], "mfsgsbase">, Group<m_x86_Features_Group>;
|
||||
def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group<m_x86_Features_Group>;
|
||||
def mfxsr : Flag<["-"], "mfxsr">, Group<m_x86_Features_Group>;
|
||||
def mno_fxsr : Flag<["-"], "mno-fxsr">, Group<m_x86_Features_Group>;
|
||||
def mgfni : Flag<["-"], "mgfni">, Group<m_x86_Features_Group>;
|
||||
def mno_gfni : Flag<["-"], "mno-gfni">, Group<m_x86_Features_Group>;
|
||||
def mlwp : Flag<["-"], "mlwp">, Group<m_x86_Features_Group>;
|
||||
def mno_lwp : Flag<["-"], "mno-lwp">, Group<m_x86_Features_Group>;
|
||||
def mlzcnt : Flag<["-"], "mlzcnt">, Group<m_x86_Features_Group>;
|
||||
@ -2543,6 +2551,10 @@ def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
|
||||
def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
|
||||
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
|
||||
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
|
||||
def mvaes : Flag<["-"], "mvaes">, Group<m_x86_Features_Group>;
|
||||
def mno_vaes : Flag<["-"], "mno-vaes">, Group<m_x86_Features_Group>;
|
||||
def mvpclmulqdq : Flag<["-"], "mvpclmulqdq">, Group<m_x86_Features_Group>;
|
||||
def mno_vpclmulqdq : Flag<["-"], "mno-vpclmulqdq">, Group<m_x86_Features_Group>;
|
||||
def mxop : Flag<["-"], "mxop">, Group<m_x86_Features_Group>;
|
||||
def mno_xop : Flag<["-"], "mno-xop">, Group<m_x86_Features_Group>;
|
||||
def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
|
||||
|
@ -2200,13 +2200,16 @@ class Parser : public CodeCompletionHandler {
|
||||
|
||||
void stripTypeAttributesOffDeclSpec(ParsedAttributesWithRange &Attrs,
|
||||
DeclSpec &DS, Sema::TagUseKind TUK);
|
||||
|
||||
void ProhibitAttributes(ParsedAttributesWithRange &attrs) {
|
||||
|
||||
// FixItLoc = possible correct location for the attributes
|
||||
void ProhibitAttributes(ParsedAttributesWithRange &attrs,
|
||||
SourceLocation FixItLoc = SourceLocation()) {
|
||||
if (!attrs.Range.isValid()) return;
|
||||
DiagnoseProhibitedAttributes(attrs);
|
||||
DiagnoseProhibitedAttributes(attrs, FixItLoc);
|
||||
attrs.clear();
|
||||
}
|
||||
void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs);
|
||||
void DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs,
|
||||
SourceLocation FixItLoc);
|
||||
|
||||
// Forbid C++11 and C2x attributes that appear on certain syntactic locations
|
||||
// which standard permits but we don't supported yet, for example, attributes
|
||||
|
@ -2148,7 +2148,7 @@ static bool unionHasUniqueObjectRepresentations(const ASTContext &Context,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isStructEmpty(QualType Ty) {
|
||||
static bool isStructEmpty(QualType Ty) {
|
||||
const RecordDecl *RD = Ty->castAs<RecordType>()->getDecl();
|
||||
|
||||
if (!RD->field_empty())
|
||||
|
@ -134,12 +134,17 @@ namespace clang {
|
||||
bool ImportTemplateArguments(const TemplateArgument *FromArgs,
|
||||
unsigned NumFromArgs,
|
||||
SmallVectorImpl<TemplateArgument> &ToArgs);
|
||||
template <typename InContainerTy>
|
||||
bool ImportTemplateArgumentListInfo(const InContainerTy &Container,
|
||||
TemplateArgumentListInfo &ToTAInfo);
|
||||
bool IsStructuralMatch(RecordDecl *FromRecord, RecordDecl *ToRecord,
|
||||
bool Complain = true);
|
||||
bool IsStructuralMatch(VarDecl *FromVar, VarDecl *ToVar,
|
||||
bool Complain = true);
|
||||
bool IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToRecord);
|
||||
bool IsStructuralMatch(EnumConstantDecl *FromEC, EnumConstantDecl *ToEC);
|
||||
bool IsStructuralMatch(FunctionTemplateDecl *From,
|
||||
FunctionTemplateDecl *To);
|
||||
bool IsStructuralMatch(ClassTemplateDecl *From, ClassTemplateDecl *To);
|
||||
bool IsStructuralMatch(VarTemplateDecl *From, VarTemplateDecl *To);
|
||||
Decl *VisitDecl(Decl *D);
|
||||
@ -195,6 +200,7 @@ namespace clang {
|
||||
ClassTemplateSpecializationDecl *D);
|
||||
Decl *VisitVarTemplateDecl(VarTemplateDecl *D);
|
||||
Decl *VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D);
|
||||
Decl *VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
|
||||
|
||||
// Importing statements
|
||||
DeclGroupRef ImportDeclGroup(DeclGroupRef DG);
|
||||
@ -280,6 +286,7 @@ namespace clang {
|
||||
Expr *VisitCXXDeleteExpr(CXXDeleteExpr *E);
|
||||
Expr *VisitCXXConstructExpr(CXXConstructExpr *E);
|
||||
Expr *VisitCXXMemberCallExpr(CXXMemberCallExpr *E);
|
||||
Expr *VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E);
|
||||
Expr *VisitExprWithCleanups(ExprWithCleanups *EWC);
|
||||
Expr *VisitCXXThisExpr(CXXThisExpr *E);
|
||||
Expr *VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *E);
|
||||
@ -1247,6 +1254,18 @@ bool ASTNodeImporter::ImportTemplateArguments(const TemplateArgument *FromArgs,
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename InContainerTy>
|
||||
bool ASTNodeImporter::ImportTemplateArgumentListInfo(
|
||||
const InContainerTy &Container, TemplateArgumentListInfo &ToTAInfo) {
|
||||
for (const auto &FromLoc : Container) {
|
||||
if (auto ToLoc = ImportTemplateArgumentLoc(FromLoc))
|
||||
ToTAInfo.addArgument(*ToLoc);
|
||||
else
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ASTNodeImporter::IsStructuralMatch(RecordDecl *FromRecord,
|
||||
RecordDecl *ToRecord, bool Complain) {
|
||||
// Eliminate a potential failure point where we attempt to re-import
|
||||
@ -1280,6 +1299,14 @@ bool ASTNodeImporter::IsStructuralMatch(EnumDecl *FromEnum, EnumDecl *ToEnum) {
|
||||
return Ctx.IsStructurallyEquivalent(FromEnum, ToEnum);
|
||||
}
|
||||
|
||||
bool ASTNodeImporter::IsStructuralMatch(FunctionTemplateDecl *From,
|
||||
FunctionTemplateDecl *To) {
|
||||
StructuralEquivalenceContext Ctx(
|
||||
Importer.getFromContext(), Importer.getToContext(),
|
||||
Importer.getNonEquivalentDecls(), false, false);
|
||||
return Ctx.IsStructurallyEquivalent(From, To);
|
||||
}
|
||||
|
||||
bool ASTNodeImporter::IsStructuralMatch(EnumConstantDecl *FromEC,
|
||||
EnumConstantDecl *ToEC)
|
||||
{
|
||||
@ -4197,6 +4224,64 @@ Decl *ASTNodeImporter::VisitVarTemplateSpecializationDecl(
|
||||
return D2;
|
||||
}
|
||||
|
||||
Decl *ASTNodeImporter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
|
||||
DeclContext *DC, *LexicalDC;
|
||||
DeclarationName Name;
|
||||
SourceLocation Loc;
|
||||
NamedDecl *ToD;
|
||||
|
||||
if (ImportDeclParts(D, DC, LexicalDC, Name, ToD, Loc))
|
||||
return nullptr;
|
||||
|
||||
if (ToD)
|
||||
return ToD;
|
||||
|
||||
// Try to find a function in our own ("to") context with the same name, same
|
||||
// type, and in the same context as the function we're importing.
|
||||
if (!LexicalDC->isFunctionOrMethod()) {
|
||||
unsigned IDNS = Decl::IDNS_Ordinary;
|
||||
SmallVector<NamedDecl *, 2> FoundDecls;
|
||||
DC->getRedeclContext()->localUncachedLookup(Name, FoundDecls);
|
||||
for (unsigned I = 0, N = FoundDecls.size(); I != N; ++I) {
|
||||
if (!FoundDecls[I]->isInIdentifierNamespace(IDNS))
|
||||
continue;
|
||||
|
||||
if (FunctionTemplateDecl *FoundFunction =
|
||||
dyn_cast<FunctionTemplateDecl>(FoundDecls[I])) {
|
||||
if (FoundFunction->hasExternalFormalLinkage() &&
|
||||
D->hasExternalFormalLinkage()) {
|
||||
if (IsStructuralMatch(D, FoundFunction)) {
|
||||
Importer.Imported(D, FoundFunction);
|
||||
// FIXME: Actually try to merge the body and other attributes.
|
||||
return FoundFunction;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TemplateParameterList *Params =
|
||||
ImportTemplateParameterList(D->getTemplateParameters());
|
||||
if (!Params)
|
||||
return nullptr;
|
||||
|
||||
FunctionDecl *TemplatedFD =
|
||||
cast_or_null<FunctionDecl>(Importer.Import(D->getTemplatedDecl()));
|
||||
if (!TemplatedFD)
|
||||
return nullptr;
|
||||
|
||||
FunctionTemplateDecl *ToFunc = FunctionTemplateDecl::Create(
|
||||
Importer.getToContext(), DC, Loc, Name, Params, TemplatedFD);
|
||||
|
||||
TemplatedFD->setDescribedFunctionTemplate(ToFunc);
|
||||
ToFunc->setAccess(D->getAccess());
|
||||
ToFunc->setLexicalDeclContext(LexicalDC);
|
||||
Importer.Imported(D, ToFunc);
|
||||
|
||||
LexicalDC->addDeclInternal(ToFunc);
|
||||
return ToFunc;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Import Statements
|
||||
//----------------------------------------------------------------------------
|
||||
@ -4321,9 +4406,8 @@ Stmt *ASTNodeImporter::VisitCompoundStmt(CompoundStmt *S) {
|
||||
|
||||
SourceLocation ToLBraceLoc = Importer.Import(S->getLBracLoc());
|
||||
SourceLocation ToRBraceLoc = Importer.Import(S->getRBracLoc());
|
||||
return new (Importer.getToContext()) CompoundStmt(Importer.getToContext(),
|
||||
ToStmts,
|
||||
ToLBraceLoc, ToRBraceLoc);
|
||||
return CompoundStmt::Create(Importer.getToContext(), ToStmts, ToLBraceLoc,
|
||||
ToRBraceLoc);
|
||||
}
|
||||
|
||||
Stmt *ASTNodeImporter::VisitCaseStmt(CaseStmt *S) {
|
||||
@ -5759,6 +5843,47 @@ Expr *ASTNodeImporter::VisitCXXPseudoDestructorExpr(
|
||||
Importer.Import(E->getTildeLoc()), Storage);
|
||||
}
|
||||
|
||||
Expr *ASTNodeImporter::VisitCXXDependentScopeMemberExpr(
|
||||
CXXDependentScopeMemberExpr *E) {
|
||||
Expr *Base = nullptr;
|
||||
if (!E->isImplicitAccess()) {
|
||||
Base = Importer.Import(E->getBase());
|
||||
if (!Base)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QualType BaseType = Importer.Import(E->getBaseType());
|
||||
if (BaseType.isNull())
|
||||
return nullptr;
|
||||
|
||||
TemplateArgumentListInfo ToTAInfo(Importer.Import(E->getLAngleLoc()),
|
||||
Importer.Import(E->getRAngleLoc()));
|
||||
TemplateArgumentListInfo *ResInfo = nullptr;
|
||||
if (E->hasExplicitTemplateArgs()) {
|
||||
if (ImportTemplateArgumentListInfo(E->template_arguments(), ToTAInfo))
|
||||
return nullptr;
|
||||
ResInfo = &ToTAInfo;
|
||||
}
|
||||
|
||||
DeclarationName Name = Importer.Import(E->getMember());
|
||||
if (!E->getMember().isEmpty() && Name.isEmpty())
|
||||
return nullptr;
|
||||
|
||||
DeclarationNameInfo MemberNameInfo(Name, Importer.Import(E->getMemberLoc()));
|
||||
// Import additional name location/type info.
|
||||
ImportDeclarationNameLoc(E->getMemberNameInfo(), MemberNameInfo);
|
||||
auto ToFQ = Importer.Import(E->getFirstQualifierFoundInScope());
|
||||
if (!ToFQ && E->getFirstQualifierFoundInScope())
|
||||
return nullptr;
|
||||
|
||||
return CXXDependentScopeMemberExpr::Create(
|
||||
Importer.getToContext(), Base, BaseType, E->isArrow(),
|
||||
Importer.Import(E->getOperatorLoc()),
|
||||
Importer.Import(E->getQualifierLoc()),
|
||||
Importer.Import(E->getTemplateKeywordLoc()),
|
||||
cast_or_null<NamedDecl>(ToFQ), MemberNameInfo, ResInfo);
|
||||
}
|
||||
|
||||
Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
|
||||
QualType T = Importer.Import(E->getType());
|
||||
if (T.isNull())
|
||||
|
@ -1550,7 +1550,7 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
|
||||
// the enum-specifier. Each scoped enumerator is declared in the
|
||||
// scope of the enumeration.
|
||||
// For the case of unscoped enumerator, do not include in the qualified
|
||||
// name any information about its enum enclosing scope, as is visibility
|
||||
// name any information about its enum enclosing scope, as its visibility
|
||||
// is global.
|
||||
if (ED->isScoped())
|
||||
OS << *ED;
|
||||
|
@ -299,31 +299,34 @@ SourceLocation Stmt::getLocEnd() const {
|
||||
llvm_unreachable("unknown statement kind");
|
||||
}
|
||||
|
||||
CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts,
|
||||
SourceLocation LB, SourceLocation RB)
|
||||
: Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) {
|
||||
CompoundStmt::CompoundStmt(ArrayRef<Stmt *> Stmts, SourceLocation LB,
|
||||
SourceLocation RB)
|
||||
: Stmt(CompoundStmtClass), LBraceLoc(LB), RBraceLoc(RB) {
|
||||
CompoundStmtBits.NumStmts = Stmts.size();
|
||||
assert(CompoundStmtBits.NumStmts == Stmts.size() &&
|
||||
"NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!");
|
||||
|
||||
if (Stmts.empty()) {
|
||||
Body = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
Body = new (C) Stmt*[Stmts.size()];
|
||||
std::copy(Stmts.begin(), Stmts.end(), Body);
|
||||
setStmts(Stmts);
|
||||
}
|
||||
|
||||
void CompoundStmt::setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts) {
|
||||
if (Body)
|
||||
C.Deallocate(Body);
|
||||
CompoundStmtBits.NumStmts = Stmts.size();
|
||||
void CompoundStmt::setStmts(ArrayRef<Stmt *> Stmts) {
|
||||
assert(CompoundStmtBits.NumStmts == Stmts.size() &&
|
||||
"NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!");
|
||||
|
||||
Body = new (C) Stmt*[Stmts.size()];
|
||||
std::copy(Stmts.begin(), Stmts.end(), Body);
|
||||
std::copy(Stmts.begin(), Stmts.end(), body_begin());
|
||||
}
|
||||
|
||||
CompoundStmt *CompoundStmt::Create(const ASTContext &C, ArrayRef<Stmt *> Stmts,
|
||||
SourceLocation LB, SourceLocation RB) {
|
||||
void *Mem =
|
||||
C.Allocate(totalSizeToAlloc<Stmt *>(Stmts.size()), alignof(CompoundStmt));
|
||||
return new (Mem) CompoundStmt(Stmts, LB, RB);
|
||||
}
|
||||
|
||||
CompoundStmt *CompoundStmt::CreateEmpty(const ASTContext &C,
|
||||
unsigned NumStmts) {
|
||||
void *Mem =
|
||||
C.Allocate(totalSizeToAlloc<Stmt *>(NumStmts), alignof(CompoundStmt));
|
||||
CompoundStmt *New = new (Mem) CompoundStmt(EmptyShell());
|
||||
New->CompoundStmtBits.NumStmts = NumStmts;
|
||||
return New;
|
||||
}
|
||||
|
||||
const char *LabelStmt::getName() const {
|
||||
@ -334,7 +337,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc,
|
||||
ArrayRef<const Attr*> Attrs,
|
||||
Stmt *SubStmt) {
|
||||
assert(!Attrs.empty() && "Attrs should not be empty");
|
||||
void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * Attrs.size(),
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(Attrs.size()),
|
||||
alignof(AttributedStmt));
|
||||
return new (Mem) AttributedStmt(Loc, Attrs, SubStmt);
|
||||
}
|
||||
@ -342,7 +345,7 @@ AttributedStmt *AttributedStmt::Create(const ASTContext &C, SourceLocation Loc,
|
||||
AttributedStmt *AttributedStmt::CreateEmpty(const ASTContext &C,
|
||||
unsigned NumAttrs) {
|
||||
assert(NumAttrs > 0 && "NumAttrs should be greater than zero");
|
||||
void *Mem = C.Allocate(sizeof(AttributedStmt) + sizeof(Attr *) * NumAttrs,
|
||||
void *Mem = C.Allocate(totalSizeToAlloc<const Attr *>(NumAttrs),
|
||||
alignof(AttributedStmt));
|
||||
return new (Mem) AttributedStmt(EmptyShell(), NumAttrs);
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ BinaryOperator *ASTMaker::makeComparison(const Expr *LHS, const Expr *RHS,
|
||||
}
|
||||
|
||||
CompoundStmt *ASTMaker::makeCompound(ArrayRef<Stmt *> Stmts) {
|
||||
return new (C) CompoundStmt(C, Stmts, SourceLocation(), SourceLocation());
|
||||
return CompoundStmt::Create(C, Stmts, SourceLocation(), SourceLocation());
|
||||
}
|
||||
|
||||
DeclRefExpr *ASTMaker::makeDeclRefExpr(
|
||||
|
@ -159,7 +159,7 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
Builder.defineMacro("__ARM_FP_FAST", "1");
|
||||
|
||||
Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
|
||||
llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4));
|
||||
Twine(Opts.WCharSize ? Opts.WCharSize : 4));
|
||||
|
||||
Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
|
||||
|
||||
|
@ -582,7 +582,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
|
||||
// ACLE 6.4.4 LDREX/STREX
|
||||
if (LDREX)
|
||||
Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX));
|
||||
Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX));
|
||||
|
||||
// ACLE 6.4.5 CLZ
|
||||
if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") ||
|
||||
@ -591,7 +591,7 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
|
||||
// ACLE 6.5.1 Hardware Floating Point
|
||||
if (HW_FP)
|
||||
Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP));
|
||||
Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP));
|
||||
|
||||
// ACLE predefines.
|
||||
Builder.defineMacro("__ARM_ACLE", "200");
|
||||
@ -672,11 +672,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
// current AArch32 NEON implementations do not support double-precision
|
||||
// floating-point even when it is present in VFP.
|
||||
Builder.defineMacro("__ARM_NEON_FP",
|
||||
"0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP));
|
||||
"0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP));
|
||||
}
|
||||
|
||||
Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
|
||||
llvm::utostr(Opts.WCharSize ? Opts.WCharSize : 4));
|
||||
Twine(Opts.WCharSize ? Opts.WCharSize : 4));
|
||||
|
||||
Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4");
|
||||
|
||||
|
@ -132,7 +132,14 @@ bool X86TargetInfo::initFeatureMap(
|
||||
break;
|
||||
|
||||
case CK_Icelake:
|
||||
// TODO: Add icelake features here.
|
||||
setFeatureEnabledImpl(Features, "vaes", true);
|
||||
setFeatureEnabledImpl(Features, "gfni", true);
|
||||
setFeatureEnabledImpl(Features, "vpclmulqdq", true);
|
||||
setFeatureEnabledImpl(Features, "avx512bitalg", true);
|
||||
setFeatureEnabledImpl(Features, "avx512vnni", true);
|
||||
setFeatureEnabledImpl(Features, "avx512vbmi2", true);
|
||||
setFeatureEnabledImpl(Features, "avx512vpopcntdq", true);
|
||||
setFeatureEnabledImpl(Features, "clwb", true);
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_Cannonlake:
|
||||
setFeatureEnabledImpl(Features, "avx512ifma", true);
|
||||
@ -145,8 +152,10 @@ bool X86TargetInfo::initFeatureMap(
|
||||
setFeatureEnabledImpl(Features, "avx512dq", true);
|
||||
setFeatureEnabledImpl(Features, "avx512bw", true);
|
||||
setFeatureEnabledImpl(Features, "avx512vl", true);
|
||||
setFeatureEnabledImpl(Features, "pku", true);
|
||||
setFeatureEnabledImpl(Features, "clwb", true);
|
||||
if (Kind == CK_SkylakeServer) {
|
||||
setFeatureEnabledImpl(Features, "pku", true);
|
||||
setFeatureEnabledImpl(Features, "clwb", true);
|
||||
}
|
||||
LLVM_FALLTHROUGH;
|
||||
case CK_SkylakeClient:
|
||||
setFeatureEnabledImpl(Features, "xsavec", true);
|
||||
@ -443,7 +452,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
|
||||
LLVM_FALLTHROUGH;
|
||||
case SSE2:
|
||||
Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] =
|
||||
false;
|
||||
Features["gfni"] = false;
|
||||
LLVM_FALLTHROUGH;
|
||||
case SSE3:
|
||||
Features["sse3"] = false;
|
||||
@ -460,7 +469,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
|
||||
LLVM_FALLTHROUGH;
|
||||
case AVX:
|
||||
Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
|
||||
Features["xsaveopt"] = false;
|
||||
Features["xsaveopt"] = Features["vaes"] = Features["vpclmulqdq"] = false;
|
||||
setXOPLevel(Features, FMA4, false);
|
||||
LLVM_FALLTHROUGH;
|
||||
case AVX2:
|
||||
@ -470,7 +479,9 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
|
||||
Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] =
|
||||
Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] =
|
||||
Features["avx512vl"] = Features["avx512vbmi"] =
|
||||
Features["avx512ifma"] = Features["avx512vpopcntdq"] = false;
|
||||
Features["avx512ifma"] = Features["avx512vpopcntdq"] =
|
||||
Features["avx512bitalg"] = Features["avx512vnni"] =
|
||||
Features["avx512vbmi2"] = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -572,9 +583,26 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
|
||||
} else if (Name == "aes") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, SSE2, Enabled);
|
||||
else
|
||||
Features["vaes"] = false;
|
||||
} else if (Name == "vaes") {
|
||||
if (Enabled) {
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
Features["aes"] = true;
|
||||
}
|
||||
} else if (Name == "pclmul") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, SSE2, Enabled);
|
||||
else
|
||||
Features["vpclmulqdq"] = false;
|
||||
} else if (Name == "vpclmulqdq") {
|
||||
if (Enabled) {
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
Features["pclmul"] = true;
|
||||
}
|
||||
} else if (Name == "gfni") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, SSE2, Enabled);
|
||||
} else if (Name == "avx") {
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
} else if (Name == "avx2") {
|
||||
@ -584,15 +612,17 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
|
||||
} else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" ||
|
||||
Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" ||
|
||||
Name == "avx512vbmi" || Name == "avx512ifma" ||
|
||||
Name == "avx512vpopcntdq") {
|
||||
Name == "avx512vpopcntdq" || Name == "avx512bitalg" ||
|
||||
Name == "avx512vnni" || Name == "avx512vbmi2") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, AVX512F, Enabled);
|
||||
// Enable BWI instruction if VBMI is being enabled.
|
||||
if (Name == "avx512vbmi" && Enabled)
|
||||
// Enable BWI instruction if VBMI/VBMI2/BITALG is being enabled.
|
||||
if ((Name.startswith("avx512vbmi") || Name == "avx512bitalg") && Enabled)
|
||||
Features["avx512bw"] = true;
|
||||
// Also disable VBMI if BWI is being disabled.
|
||||
// Also disable VBMI/VBMI2/BITALG if BWI is being disabled.
|
||||
if (Name == "avx512bw" && !Enabled)
|
||||
Features["avx512vbmi"] = false;
|
||||
Features["avx512vbmi"] = Features["avx512vbmi2"] =
|
||||
Features["avx512bitalg"] = false;
|
||||
} else if (Name == "fma") {
|
||||
if (Enabled)
|
||||
setSSELevel(Features, AVX, Enabled);
|
||||
@ -636,8 +666,12 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
|
||||
if (Feature == "+aes") {
|
||||
HasAES = true;
|
||||
} else if (Feature == "+vaes") {
|
||||
HasVAES = true;
|
||||
} else if (Feature == "+pclmul") {
|
||||
HasPCLMUL = true;
|
||||
} else if (Feature == "+vpclmulqdq") {
|
||||
HasVPCLMULQDQ = true;
|
||||
} else if (Feature == "+lzcnt") {
|
||||
HasLZCNT = true;
|
||||
} else if (Feature == "+rdrnd") {
|
||||
@ -666,22 +700,30 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
HasFMA = true;
|
||||
} else if (Feature == "+f16c") {
|
||||
HasF16C = true;
|
||||
} else if (Feature == "+gfni") {
|
||||
HasGFNI = true;
|
||||
} else if (Feature == "+avx512cd") {
|
||||
HasAVX512CD = true;
|
||||
} else if (Feature == "+avx512vpopcntdq") {
|
||||
HasAVX512VPOPCNTDQ = true;
|
||||
} else if (Feature == "+avx512vnni") {
|
||||
HasAVX512VNNI = true;
|
||||
} else if (Feature == "+avx512er") {
|
||||
HasAVX512ER = true;
|
||||
} else if (Feature == "+avx512pf") {
|
||||
HasAVX512PF = true;
|
||||
} else if (Feature == "+avx512dq") {
|
||||
HasAVX512DQ = true;
|
||||
} else if (Feature == "+avx512bitalg") {
|
||||
HasAVX512BITALG = true;
|
||||
} else if (Feature == "+avx512bw") {
|
||||
HasAVX512BW = true;
|
||||
} else if (Feature == "+avx512vl") {
|
||||
HasAVX512VL = true;
|
||||
} else if (Feature == "+avx512vbmi") {
|
||||
HasAVX512VBMI = true;
|
||||
} else if (Feature == "+avx512vbmi2") {
|
||||
HasAVX512VBMI2 = true;
|
||||
} else if (Feature == "+avx512ifma") {
|
||||
HasAVX512IFMA = true;
|
||||
} else if (Feature == "+sha") {
|
||||
@ -934,9 +976,15 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
if (HasAES)
|
||||
Builder.defineMacro("__AES__");
|
||||
|
||||
if (HasVAES)
|
||||
Builder.defineMacro("__VAES__");
|
||||
|
||||
if (HasPCLMUL)
|
||||
Builder.defineMacro("__PCLMUL__");
|
||||
|
||||
if (HasVPCLMULQDQ)
|
||||
Builder.defineMacro("__VPCLMULQDQ__");
|
||||
|
||||
if (HasLZCNT)
|
||||
Builder.defineMacro("__LZCNT__");
|
||||
|
||||
@ -996,22 +1044,31 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
if (HasF16C)
|
||||
Builder.defineMacro("__F16C__");
|
||||
|
||||
if (HasGFNI)
|
||||
Builder.defineMacro("__GFNI__");
|
||||
|
||||
if (HasAVX512CD)
|
||||
Builder.defineMacro("__AVX512CD__");
|
||||
if (HasAVX512VPOPCNTDQ)
|
||||
Builder.defineMacro("__AVX512VPOPCNTDQ__");
|
||||
if (HasAVX512VNNI)
|
||||
Builder.defineMacro("__AVX512VNNI__");
|
||||
if (HasAVX512ER)
|
||||
Builder.defineMacro("__AVX512ER__");
|
||||
if (HasAVX512PF)
|
||||
Builder.defineMacro("__AVX512PF__");
|
||||
if (HasAVX512DQ)
|
||||
Builder.defineMacro("__AVX512DQ__");
|
||||
if (HasAVX512BITALG)
|
||||
Builder.defineMacro("__AVX512BITALG__");
|
||||
if (HasAVX512BW)
|
||||
Builder.defineMacro("__AVX512BW__");
|
||||
if (HasAVX512VL)
|
||||
Builder.defineMacro("__AVX512VL__");
|
||||
if (HasAVX512VBMI)
|
||||
Builder.defineMacro("__AVX512VBMI__");
|
||||
if (HasAVX512VBMI2)
|
||||
Builder.defineMacro("__AVX512VBMI2__");
|
||||
if (HasAVX512IFMA)
|
||||
Builder.defineMacro("__AVX512IFMA__");
|
||||
|
||||
@ -1141,12 +1198,15 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
||||
.Case("avx512f", true)
|
||||
.Case("avx512cd", true)
|
||||
.Case("avx512vpopcntdq", true)
|
||||
.Case("avx512vnni", true)
|
||||
.Case("avx512er", true)
|
||||
.Case("avx512pf", true)
|
||||
.Case("avx512dq", true)
|
||||
.Case("avx512bitalg", true)
|
||||
.Case("avx512bw", true)
|
||||
.Case("avx512vl", true)
|
||||
.Case("avx512vbmi", true)
|
||||
.Case("avx512vbmi2", true)
|
||||
.Case("avx512ifma", true)
|
||||
.Case("bmi", true)
|
||||
.Case("bmi2", true)
|
||||
@ -1159,6 +1219,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
||||
.Case("fma4", true)
|
||||
.Case("fsgsbase", true)
|
||||
.Case("fxsr", true)
|
||||
.Case("gfni", true)
|
||||
.Case("lwp", true)
|
||||
.Case("lzcnt", true)
|
||||
.Case("mmx", true)
|
||||
@ -1185,6 +1246,8 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
|
||||
.Case("sse4.2", true)
|
||||
.Case("sse4a", true)
|
||||
.Case("tbm", true)
|
||||
.Case("vaes", true)
|
||||
.Case("vpclmulqdq", true)
|
||||
.Case("x87", true)
|
||||
.Case("xop", true)
|
||||
.Case("xsave", true)
|
||||
@ -1203,12 +1266,15 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("avx512f", SSELevel >= AVX512F)
|
||||
.Case("avx512cd", HasAVX512CD)
|
||||
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
|
||||
.Case("avx512vnni", HasAVX512VNNI)
|
||||
.Case("avx512er", HasAVX512ER)
|
||||
.Case("avx512pf", HasAVX512PF)
|
||||
.Case("avx512dq", HasAVX512DQ)
|
||||
.Case("avx512bitalg", HasAVX512BITALG)
|
||||
.Case("avx512bw", HasAVX512BW)
|
||||
.Case("avx512vl", HasAVX512VL)
|
||||
.Case("avx512vbmi", HasAVX512VBMI)
|
||||
.Case("avx512vbmi2", HasAVX512VBMI2)
|
||||
.Case("avx512ifma", HasAVX512IFMA)
|
||||
.Case("bmi", HasBMI)
|
||||
.Case("bmi2", HasBMI2)
|
||||
@ -1221,6 +1287,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("fma4", XOPLevel >= FMA4)
|
||||
.Case("fsgsbase", HasFSGSBASE)
|
||||
.Case("fxsr", HasFXSR)
|
||||
.Case("gfni", HasGFNI)
|
||||
.Case("ibt", HasIBT)
|
||||
.Case("lwp", HasLWP)
|
||||
.Case("lzcnt", HasLZCNT)
|
||||
@ -1249,6 +1316,8 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
|
||||
.Case("sse4.2", SSELevel >= SSE42)
|
||||
.Case("sse4a", XOPLevel >= SSE4A)
|
||||
.Case("tbm", HasTBM)
|
||||
.Case("vaes", HasVAES)
|
||||
.Case("vpclmulqdq", HasVPCLMULQDQ)
|
||||
.Case("x86", true)
|
||||
.Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
|
||||
.Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
|
||||
|
@ -48,7 +48,10 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
||||
enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP;
|
||||
|
||||
bool HasAES = false;
|
||||
bool HasVAES = false;
|
||||
bool HasPCLMUL = false;
|
||||
bool HasVPCLMULQDQ = false;
|
||||
bool HasGFNI = false;
|
||||
bool HasLZCNT = false;
|
||||
bool HasRDRND = false;
|
||||
bool HasFSGSBASE = false;
|
||||
@ -65,12 +68,15 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
||||
bool HasF16C = false;
|
||||
bool HasAVX512CD = false;
|
||||
bool HasAVX512VPOPCNTDQ = false;
|
||||
bool HasAVX512VNNI = false;
|
||||
bool HasAVX512ER = false;
|
||||
bool HasAVX512PF = false;
|
||||
bool HasAVX512DQ = false;
|
||||
bool HasAVX512BITALG = false;
|
||||
bool HasAVX512BW = false;
|
||||
bool HasAVX512VL = false;
|
||||
bool HasAVX512VBMI = false;
|
||||
bool HasAVX512VBMI2 = false;
|
||||
bool HasAVX512IFMA = false;
|
||||
bool HasSHA = false;
|
||||
bool HasMPX = false;
|
||||
|
@ -8143,12 +8143,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
||||
case X86::BI__builtin_ia32_storesd128_mask: {
|
||||
return EmitX86MaskedStore(*this, Ops, 16);
|
||||
}
|
||||
case X86::BI__builtin_ia32_vpopcntb_128:
|
||||
case X86::BI__builtin_ia32_vpopcntd_128:
|
||||
case X86::BI__builtin_ia32_vpopcntq_128:
|
||||
case X86::BI__builtin_ia32_vpopcntw_128:
|
||||
case X86::BI__builtin_ia32_vpopcntb_256:
|
||||
case X86::BI__builtin_ia32_vpopcntd_256:
|
||||
case X86::BI__builtin_ia32_vpopcntq_256:
|
||||
case X86::BI__builtin_ia32_vpopcntw_256:
|
||||
case X86::BI__builtin_ia32_vpopcntb_512:
|
||||
case X86::BI__builtin_ia32_vpopcntd_512:
|
||||
case X86::BI__builtin_ia32_vpopcntq_512: {
|
||||
case X86::BI__builtin_ia32_vpopcntq_512:
|
||||
case X86::BI__builtin_ia32_vpopcntw_512: {
|
||||
llvm::Type *ResultType = ConvertType(E->getType());
|
||||
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
|
||||
return Builder.CreateCall(F, Ops);
|
||||
|
@ -570,7 +570,7 @@ static llvm::Value *emitHash16Bytes(CGBuilderTy &Builder, llvm::Value *Low,
|
||||
|
||||
bool CodeGenFunction::isNullPointerAllowed(TypeCheckKind TCK) {
|
||||
return TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
|
||||
TCK == TCK_UpcastToVirtualBase;
|
||||
TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation;
|
||||
}
|
||||
|
||||
bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) {
|
||||
@ -578,7 +578,7 @@ bool CodeGenFunction::isVptrCheckRequired(TypeCheckKind TCK, QualType Ty) {
|
||||
return (RD && RD->hasDefinition() && RD->isDynamicClass()) &&
|
||||
(TCK == TCK_MemberAccess || TCK == TCK_MemberCall ||
|
||||
TCK == TCK_DowncastPointer || TCK == TCK_DowncastReference ||
|
||||
TCK == TCK_UpcastToVirtualBase);
|
||||
TCK == TCK_UpcastToVirtualBase || TCK == TCK_DynamicOperation);
|
||||
}
|
||||
|
||||
bool CodeGenFunction::sanitizePerformTypeCheck() const {
|
||||
|
@ -2056,6 +2056,15 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
|
||||
// Get the vtable pointer.
|
||||
Address ThisPtr = CGF.EmitLValue(E).getAddress();
|
||||
|
||||
QualType SrcRecordTy = E->getType();
|
||||
|
||||
// C++ [class.cdtor]p4:
|
||||
// If the operand of typeid refers to the object under construction or
|
||||
// destruction and the static type of the operand is neither the constructor
|
||||
// or destructor’s class nor one of its bases, the behavior is undefined.
|
||||
CGF.EmitTypeCheck(CodeGenFunction::TCK_DynamicOperation, E->getExprLoc(),
|
||||
ThisPtr.getPointer(), SrcRecordTy);
|
||||
|
||||
// C++ [expr.typeid]p2:
|
||||
// If the glvalue expression is obtained by applying the unary * operator to
|
||||
// a pointer and the pointer is a null pointer value, the typeid expression
|
||||
@ -2064,7 +2073,6 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
|
||||
// However, this paragraph's intent is not clear. We choose a very generous
|
||||
// interpretation which implores us to consider comma operators, conditional
|
||||
// operators, parentheses and other such constructs.
|
||||
QualType SrcRecordTy = E->getType();
|
||||
if (CGF.CGM.getCXXABI().shouldTypeidBeNullChecked(
|
||||
isGLValueFromPointerDeref(E), SrcRecordTy)) {
|
||||
llvm::BasicBlock *BadTypeidBlock =
|
||||
@ -2127,10 +2135,6 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
|
||||
CGM.EmitExplicitCastExprType(DCE, this);
|
||||
QualType DestTy = DCE->getTypeAsWritten();
|
||||
|
||||
if (DCE->isAlwaysNull())
|
||||
if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy))
|
||||
return T;
|
||||
|
||||
QualType SrcTy = DCE->getSubExpr()->getType();
|
||||
|
||||
// C++ [expr.dynamic.cast]p7:
|
||||
@ -2151,6 +2155,18 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
|
||||
DestRecordTy = DestTy->castAs<ReferenceType>()->getPointeeType();
|
||||
}
|
||||
|
||||
// C++ [class.cdtor]p5:
|
||||
// If the operand of the dynamic_cast refers to the object under
|
||||
// construction or destruction and the static type of the operand is not a
|
||||
// pointer to or object of the constructor or destructor’s own class or one
|
||||
// of its bases, the dynamic_cast results in undefined behavior.
|
||||
EmitTypeCheck(TCK_DynamicOperation, DCE->getExprLoc(), ThisAddr.getPointer(),
|
||||
SrcRecordTy);
|
||||
|
||||
if (DCE->isAlwaysNull())
|
||||
if (llvm::Value *T = EmitDynamicCastToNull(*this, DestTy))
|
||||
return T;
|
||||
|
||||
assert(SrcRecordTy->isRecordType() && "source type must be a record type!");
|
||||
|
||||
// C++ [expr.dynamic.cast]p4:
|
||||
|
@ -4175,14 +4175,23 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
|
||||
LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
|
||||
LValue SrcBase;
|
||||
if (!Data.FirstprivateVars.empty()) {
|
||||
bool IsTargetTask =
|
||||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
|
||||
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
|
||||
// For target-based directives skip 3 firstprivate arrays BasePointersArray,
|
||||
// PointersArray and SizesArray. The original variables for these arrays are
|
||||
// not captured and we get their addresses explicitly.
|
||||
if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
|
||||
(IsTargetTask && Data.FirstprivateVars.size() > 3)) {
|
||||
SrcBase = CGF.MakeAddrLValue(
|
||||
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
||||
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
||||
SharedsTy);
|
||||
}
|
||||
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
|
||||
cast<CapturedStmt>(*D.getAssociatedStmt()));
|
||||
OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
|
||||
? OMPD_taskloop
|
||||
: OMPD_task;
|
||||
CodeGenFunction::CGCapturedStmtInfo CapturesInfo(*D.getCapturedStmt(Kind));
|
||||
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
||||
for (auto &&Pair : Privates) {
|
||||
auto *VD = Pair.second.PrivateCopy;
|
||||
@ -4192,14 +4201,27 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||
LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
|
||||
if (auto *Elem = Pair.second.PrivateElemInit) {
|
||||
auto *OriginalVD = Pair.second.Original;
|
||||
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
||||
auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
|
||||
SharedRefLValue = CGF.MakeAddrLValue(
|
||||
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
||||
SharedRefLValue.getType(),
|
||||
LValueBaseInfo(AlignmentSource::Decl),
|
||||
SharedRefLValue.getTBAAInfo());
|
||||
// Check if the variable is the target-based BasePointersArray,
|
||||
// PointersArray or SizesArray.
|
||||
LValue SharedRefLValue;
|
||||
QualType Type = OriginalVD->getType();
|
||||
if (IsTargetTask && isa<ImplicitParamDecl>(OriginalVD) &&
|
||||
isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
|
||||
cast<CapturedDecl>(OriginalVD->getDeclContext())->getNumParams() ==
|
||||
0 &&
|
||||
isa<TranslationUnitDecl>(
|
||||
cast<CapturedDecl>(OriginalVD->getDeclContext())
|
||||
->getDeclContext())) {
|
||||
SharedRefLValue =
|
||||
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
|
||||
} else {
|
||||
auto *SharedField = CapturesInfo.lookup(OriginalVD);
|
||||
SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
|
||||
SharedRefLValue = CGF.MakeAddrLValue(
|
||||
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
|
||||
SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
|
||||
SharedRefLValue.getTBAAInfo());
|
||||
}
|
||||
if (Type->isArrayType()) {
|
||||
// Initialize firstprivate array.
|
||||
if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
|
||||
@ -4400,8 +4422,10 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
}
|
||||
KmpTaskTQTy = SavedKmpTaskloopTQTy;
|
||||
} else {
|
||||
assert(D.getDirectiveKind() == OMPD_task &&
|
||||
"Expected taskloop or task directive");
|
||||
assert((D.getDirectiveKind() == OMPD_task ||
|
||||
isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
|
||||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
|
||||
"Expected taskloop, task or target directive");
|
||||
if (SavedKmpTaskTQTy.isNull()) {
|
||||
SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
|
||||
CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
|
||||
@ -7417,8 +7441,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
|
||||
// Generate the code for the opening of the data environment. Capture all the
|
||||
// arguments of the runtime call by reference because they are used in the
|
||||
// closing of the region.
|
||||
auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
auto &&BeginThenGen = [this, &D, Device, &Info,
|
||||
&CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
// Fill up the arrays with all the mapped variables.
|
||||
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
||||
MappableExprsHandler::MapValuesArrayTy Pointers;
|
||||
@ -7454,8 +7478,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, PointerNum, BasePointersArrayArg,
|
||||
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
|
||||
auto &RT = CGF.CGM.getOpenMPRuntime();
|
||||
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
|
||||
OffloadingArgs);
|
||||
|
||||
// If device pointer privatization is required, emit the body of the region
|
||||
@ -7465,7 +7488,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
|
||||
};
|
||||
|
||||
// Generate code for the closing of the data region.
|
||||
auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
assert(Info.isValid() && "Invalid data environment closing arguments.");
|
||||
|
||||
llvm::Value *BasePointersArrayArg = nullptr;
|
||||
@ -7490,8 +7514,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, PointerNum, BasePointersArrayArg,
|
||||
PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
|
||||
auto &RT = CGF.CGM.getOpenMPRuntime();
|
||||
CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
|
||||
OffloadingArgs);
|
||||
};
|
||||
|
||||
@ -7543,25 +7566,11 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
|
||||
isa<OMPTargetUpdateDirective>(D)) &&
|
||||
"Expecting either target enter, exit data, or update directives.");
|
||||
|
||||
CodeGenFunction::OMPTargetDataInfo InputInfo;
|
||||
llvm::Value *MapTypesArray = nullptr;
|
||||
// Generate the code for the opening of the data environment.
|
||||
auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
// Fill up the arrays with all the mapped variables.
|
||||
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
||||
MappableExprsHandler::MapValuesArrayTy Pointers;
|
||||
MappableExprsHandler::MapValuesArrayTy Sizes;
|
||||
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
||||
|
||||
// Get map clause information.
|
||||
MappableExprsHandler MEHandler(D, CGF);
|
||||
MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
|
||||
|
||||
// Fill up the arrays and create the arguments.
|
||||
TargetDataInfo Info;
|
||||
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
||||
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
||||
Info.PointersArray, Info.SizesArray,
|
||||
Info.MapTypesArray, Info);
|
||||
|
||||
auto &&ThenGen = [this, &D, Device, &InputInfo,
|
||||
&MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
// Emit device ID if any.
|
||||
llvm::Value *DeviceID = nullptr;
|
||||
if (Device) {
|
||||
@ -7572,13 +7581,16 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
|
||||
}
|
||||
|
||||
// Emit the number of elements in the offloading arrays.
|
||||
auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
|
||||
llvm::Constant *PointerNum =
|
||||
CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
|
||||
|
||||
llvm::Value *OffloadingArgs[] = {
|
||||
DeviceID, PointerNum, Info.BasePointersArray,
|
||||
Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
|
||||
llvm::Value *OffloadingArgs[] = {DeviceID,
|
||||
PointerNum,
|
||||
InputInfo.BasePointersArray.getPointer(),
|
||||
InputInfo.PointersArray.getPointer(),
|
||||
InputInfo.SizesArray.getPointer(),
|
||||
MapTypesArray};
|
||||
|
||||
auto &RT = CGF.CGM.getOpenMPRuntime();
|
||||
// Select the right runtime function call for each expected standalone
|
||||
// directive.
|
||||
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
|
||||
@ -7600,18 +7612,47 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
|
||||
: OMPRTL__tgt_target_data_update;
|
||||
break;
|
||||
}
|
||||
CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
|
||||
CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
|
||||
};
|
||||
|
||||
// In the event we get an if clause, we don't have to take any action on the
|
||||
// else side.
|
||||
auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
|
||||
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
|
||||
CodeGenFunction &CGF, PrePostActionTy &) {
|
||||
// Fill up the arrays with all the mapped variables.
|
||||
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
|
||||
MappableExprsHandler::MapValuesArrayTy Pointers;
|
||||
MappableExprsHandler::MapValuesArrayTy Sizes;
|
||||
MappableExprsHandler::MapFlagsArrayTy MapTypes;
|
||||
|
||||
if (IfCond) {
|
||||
emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
|
||||
} else {
|
||||
RegionCodeGenTy ThenGenRCG(ThenGen);
|
||||
ThenGenRCG(CGF);
|
||||
// Get map clause information.
|
||||
MappableExprsHandler MEHandler(D, CGF);
|
||||
MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
|
||||
|
||||
TargetDataInfo Info;
|
||||
// Fill up the arrays and create the arguments.
|
||||
emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
|
||||
emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
|
||||
Info.PointersArray, Info.SizesArray,
|
||||
Info.MapTypesArray, Info);
|
||||
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
|
||||
InputInfo.BasePointersArray =
|
||||
Address(Info.BasePointersArray, CGM.getPointerAlign());
|
||||
InputInfo.PointersArray =
|
||||
Address(Info.PointersArray, CGM.getPointerAlign());
|
||||
InputInfo.SizesArray =
|
||||
Address(Info.SizesArray, CGM.getPointerAlign());
|
||||
MapTypesArray = Info.MapTypesArray;
|
||||
if (D.hasClausesOfKind<OMPDependClause>())
|
||||
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
|
||||
else
|
||||
emitInlinedDirective(CGF, OMPD_target_update, ThenGen);
|
||||
};
|
||||
|
||||
if (IfCond)
|
||||
emitOMPIfClause(CGF, IfCond, TargetThenGen,
|
||||
[](CodeGenFunction &CGF, PrePostActionTy &) {});
|
||||
else {
|
||||
RegionCodeGenTy ThenRCG(TargetThenGen);
|
||||
ThenRCG(CGF);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,10 +33,11 @@ enum OpenMPRTLFunctionNVPTX {
|
||||
/// \brief Call to void __kmpc_spmd_kernel_deinit();
|
||||
OMPRTL_NVPTX__kmpc_spmd_kernel_deinit,
|
||||
/// \brief Call to void __kmpc_kernel_prepare_parallel(void
|
||||
/// *outlined_function, void ***args, kmp_int32 nArgs);
|
||||
/// *outlined_function, void ***args, kmp_int32 nArgs, int16_t
|
||||
/// IsOMPRuntimeInitialized);
|
||||
OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
|
||||
/// \brief Call to bool __kmpc_kernel_parallel(void **outlined_function, void
|
||||
/// ***args);
|
||||
/// ***args, int16_t IsOMPRuntimeInitialized);
|
||||
OMPRTL_NVPTX__kmpc_kernel_parallel,
|
||||
/// \brief Call to void __kmpc_kernel_end_parallel();
|
||||
OMPRTL_NVPTX__kmpc_kernel_end_parallel,
|
||||
@ -521,7 +522,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
|
||||
// Set up shared arguments
|
||||
Address SharedArgs =
|
||||
CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrPtrTy, "shared_args");
|
||||
llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer()};
|
||||
// TODO: Optimize runtime initialization and pass in correct value.
|
||||
llvm::Value *Args[] = {WorkFn.getPointer(), SharedArgs.getPointer(),
|
||||
/*RequiresOMPRuntime=*/Bld.getInt16(1)};
|
||||
llvm::Value *Ret = CGF.EmitRuntimeCall(
|
||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
|
||||
Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
|
||||
@ -637,18 +640,21 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
|
||||
}
|
||||
case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
|
||||
/// Build void __kmpc_kernel_prepare_parallel(
|
||||
/// void *outlined_function, void ***args, kmp_int32 nArgs);
|
||||
/// void *outlined_function, void ***args, kmp_int32 nArgs, int16_t
|
||||
/// IsOMPRuntimeInitialized);
|
||||
llvm::Type *TypeParams[] = {CGM.Int8PtrTy,
|
||||
CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty};
|
||||
CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int32Ty,
|
||||
CGM.Int16Ty};
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
|
||||
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
|
||||
break;
|
||||
}
|
||||
case OMPRTL_NVPTX__kmpc_kernel_parallel: {
|
||||
/// Build bool __kmpc_kernel_parallel(void **outlined_function, void ***args);
|
||||
/// Build bool __kmpc_kernel_parallel(void **outlined_function, void
|
||||
/// ***args, int16_t IsOMPRuntimeInitialized);
|
||||
llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy,
|
||||
CGM.Int8PtrPtrTy->getPointerTo(0)};
|
||||
CGM.Int8PtrPtrTy->getPointerTo(0), CGM.Int16Ty};
|
||||
llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
|
||||
llvm::FunctionType *FnTy =
|
||||
llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
|
||||
@ -949,8 +955,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
||||
CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
|
||||
"shared_args");
|
||||
llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
|
||||
// TODO: Optimize runtime initialization and pass in correct value.
|
||||
llvm::Value *Args[] = {ID, SharedArgsPtr,
|
||||
Bld.getInt32(CapturedVars.size())};
|
||||
Bld.getInt32(CapturedVars.size()),
|
||||
/*RequiresOMPRuntime=*/Bld.getInt16(1)};
|
||||
|
||||
CGF.EmitRuntimeCall(
|
||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
|
||||
@ -970,9 +978,10 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
||||
Idx++;
|
||||
}
|
||||
} else {
|
||||
llvm::Value *Args[] = {ID,
|
||||
llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)),
|
||||
/*nArgs=*/Bld.getInt32(0)};
|
||||
// TODO: Optimize runtime initialization and pass in correct value.
|
||||
llvm::Value *Args[] = {
|
||||
ID, llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy->getPointerTo(0)),
|
||||
/*nArgs=*/Bld.getInt32(0), /*RequiresOMPRuntime=*/Bld.getInt16(1)};
|
||||
CGF.EmitRuntimeCall(
|
||||
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
|
||||
Args);
|
||||
|
@ -2907,6 +2907,151 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
|
||||
TaskGen(*this, OutlinedFn, Data);
|
||||
}
|
||||
|
||||
static ImplicitParamDecl *
|
||||
createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
|
||||
QualType Ty, CapturedDecl *CD) {
|
||||
auto *OrigVD = ImplicitParamDecl::Create(
|
||||
C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
|
||||
auto *OrigRef =
|
||||
DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
|
||||
/*RefersToEnclosingVariableOrCapture=*/false,
|
||||
SourceLocation(), Ty, VK_LValue);
|
||||
auto *PrivateVD = ImplicitParamDecl::Create(
|
||||
C, CD, SourceLocation(), /*Id=*/nullptr, Ty, ImplicitParamDecl::Other);
|
||||
auto *PrivateRef = DeclRefExpr::Create(
|
||||
C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
|
||||
/*RefersToEnclosingVariableOrCapture=*/false, SourceLocation(), Ty,
|
||||
VK_LValue);
|
||||
QualType ElemType = C.getBaseElementType(Ty);
|
||||
auto *InitVD =
|
||||
ImplicitParamDecl::Create(C, CD, SourceLocation(), /*Id=*/nullptr,
|
||||
ElemType, ImplicitParamDecl::Other);
|
||||
auto *InitRef =
|
||||
DeclRefExpr::Create(C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
|
||||
/*RefersToEnclosingVariableOrCapture=*/false,
|
||||
SourceLocation(), ElemType, VK_LValue);
|
||||
PrivateVD->setInitStyle(VarDecl::CInit);
|
||||
PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
|
||||
InitRef, /*BasePath=*/nullptr,
|
||||
VK_RValue));
|
||||
Data.FirstprivateVars.emplace_back(OrigRef);
|
||||
Data.FirstprivateCopies.emplace_back(PrivateRef);
|
||||
Data.FirstprivateInits.emplace_back(InitRef);
|
||||
return OrigVD;
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
|
||||
const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
|
||||
OMPTargetDataInfo &InputInfo) {
|
||||
// Emit outlined function for task construct.
|
||||
auto CS = S.getCapturedStmt(OMPD_task);
|
||||
auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
|
||||
auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
|
||||
auto *I = CS->getCapturedDecl()->param_begin();
|
||||
auto *PartId = std::next(I);
|
||||
auto *TaskT = std::next(I, 4);
|
||||
OMPTaskDataTy Data;
|
||||
// The task is not final.
|
||||
Data.Final.setInt(/*IntVal=*/false);
|
||||
// Get list of firstprivate variables.
|
||||
for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
|
||||
auto IRef = C->varlist_begin();
|
||||
auto IElemInitRef = C->inits().begin();
|
||||
for (auto *IInit : C->private_copies()) {
|
||||
Data.FirstprivateVars.push_back(*IRef);
|
||||
Data.FirstprivateCopies.push_back(IInit);
|
||||
Data.FirstprivateInits.push_back(*IElemInitRef);
|
||||
++IRef;
|
||||
++IElemInitRef;
|
||||
}
|
||||
}
|
||||
OMPPrivateScope TargetScope(*this);
|
||||
VarDecl *BPVD = nullptr;
|
||||
VarDecl *PVD = nullptr;
|
||||
VarDecl *SVD = nullptr;
|
||||
if (InputInfo.NumberOfTargetItems > 0) {
|
||||
auto *CD = CapturedDecl::Create(
|
||||
getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
|
||||
llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
|
||||
QualType BaseAndPointersType = getContext().getConstantArrayType(
|
||||
getContext().VoidPtrTy, ArrSize, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
BPVD = createImplicitFirstprivateForType(getContext(), Data,
|
||||
BaseAndPointersType, CD);
|
||||
PVD = createImplicitFirstprivateForType(getContext(), Data,
|
||||
BaseAndPointersType, CD);
|
||||
QualType SizesType = getContext().getConstantArrayType(
|
||||
getContext().getSizeType(), ArrSize, ArrayType::Normal,
|
||||
/*IndexTypeQuals=*/0);
|
||||
SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD);
|
||||
TargetScope.addPrivate(
|
||||
BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
|
||||
TargetScope.addPrivate(PVD,
|
||||
[&InputInfo]() { return InputInfo.PointersArray; });
|
||||
TargetScope.addPrivate(SVD,
|
||||
[&InputInfo]() { return InputInfo.SizesArray; });
|
||||
}
|
||||
(void)TargetScope.Privatize();
|
||||
// Build list of dependences.
|
||||
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
|
||||
for (auto *IRef : C->varlists())
|
||||
Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
|
||||
auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
|
||||
&InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
|
||||
// Set proper addresses for generated private copies.
|
||||
OMPPrivateScope Scope(CGF);
|
||||
if (!Data.FirstprivateVars.empty()) {
|
||||
enum { PrivatesParam = 2, CopyFnParam = 3 };
|
||||
auto *CopyFn = CGF.Builder.CreateLoad(
|
||||
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
|
||||
auto *PrivatesPtr = CGF.Builder.CreateLoad(
|
||||
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
|
||||
// Map privates.
|
||||
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
|
||||
llvm::SmallVector<llvm::Value *, 16> CallArgs;
|
||||
CallArgs.push_back(PrivatesPtr);
|
||||
for (auto *E : Data.FirstprivateVars) {
|
||||
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
|
||||
Address PrivatePtr =
|
||||
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
|
||||
".firstpriv.ptr.addr");
|
||||
PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
|
||||
CallArgs.push_back(PrivatePtr.getPointer());
|
||||
}
|
||||
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getLocStart(),
|
||||
CopyFn, CallArgs);
|
||||
for (auto &&Pair : PrivatePtrs) {
|
||||
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
|
||||
CGF.getContext().getDeclAlign(Pair.first));
|
||||
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
|
||||
}
|
||||
}
|
||||
// Privatize all private variables except for in_reduction items.
|
||||
(void)Scope.Privatize();
|
||||
InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
|
||||
CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0, CGF.getPointerSize());
|
||||
InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
|
||||
CGF.GetAddrOfLocalVar(PVD), /*Index=*/0, CGF.getPointerSize());
|
||||
InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
|
||||
CGF.GetAddrOfLocalVar(SVD), /*Index=*/0, CGF.getSizeSize());
|
||||
|
||||
Action.Enter(CGF);
|
||||
OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true,
|
||||
/*EmitPreInitStmt=*/false);
|
||||
BodyGen(CGF);
|
||||
};
|
||||
auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
|
||||
S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
|
||||
Data.NumberOfParts);
|
||||
llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
|
||||
IntegerLiteral IfCond(getContext(), TrueOrFalse,
|
||||
getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
|
||||
SourceLocation());
|
||||
|
||||
CGM.getOpenMPRuntime().emitTaskCall(*this, S.getLocStart(), S, OutlinedFn,
|
||||
SharedsTy, CapturedStruct, &IfCond, Data);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
|
||||
// Emit outlined function for task construct.
|
||||
auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
|
||||
@ -4252,14 +4397,8 @@ void CodeGenFunction::EmitOMPTargetEnterDataDirective(
|
||||
if (auto *C = S.getSingleClause<OMPDeviceClause>())
|
||||
Device = C->getDevice();
|
||||
|
||||
auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
|
||||
Device);
|
||||
};
|
||||
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_enter_data,
|
||||
CodeGen);
|
||||
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
||||
}
|
||||
|
||||
void CodeGenFunction::EmitOMPTargetExitDataDirective(
|
||||
@ -4279,14 +4418,8 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
|
||||
if (auto *C = S.getSingleClause<OMPDeviceClause>())
|
||||
Device = C->getDevice();
|
||||
|
||||
auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
|
||||
Device);
|
||||
};
|
||||
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_exit_data,
|
||||
CodeGen);
|
||||
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
||||
}
|
||||
|
||||
static void emitTargetParallelRegion(CodeGenFunction &CGF,
|
||||
@ -4585,12 +4718,6 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
|
||||
if (auto *C = S.getSingleClause<OMPDeviceClause>())
|
||||
Device = C->getDevice();
|
||||
|
||||
auto &&CodeGen = [&S, IfCond, Device](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
CGF.CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(CGF, S, IfCond,
|
||||
Device);
|
||||
};
|
||||
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
|
||||
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_target_update,
|
||||
CodeGen);
|
||||
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
|
||||
}
|
||||
|
@ -2371,7 +2371,10 @@ class CodeGenFunction : public CodeGenTypeCache {
|
||||
/// object within its lifetime.
|
||||
TCK_UpcastToVirtualBase,
|
||||
/// Checking the value assigned to a _Nonnull pointer. Must not be null.
|
||||
TCK_NonnullAssign
|
||||
TCK_NonnullAssign,
|
||||
/// Checking the operand of a dynamic_cast or a typeid expression. Must be
|
||||
/// null or an object within its lifetime.
|
||||
TCK_DynamicOperation
|
||||
};
|
||||
|
||||
/// Determine whether the pointer type check \p TCK permits null pointers.
|
||||
@ -2820,6 +2823,20 @@ class CodeGenFunction : public CodeGenTypeCache {
|
||||
void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
|
||||
const RegionCodeGenTy &BodyGen,
|
||||
const TaskGenTy &TaskGen, OMPTaskDataTy &Data);
|
||||
struct OMPTargetDataInfo {
|
||||
Address BasePointersArray = Address::invalid();
|
||||
Address PointersArray = Address::invalid();
|
||||
Address SizesArray = Address::invalid();
|
||||
unsigned NumberOfTargetItems = 0;
|
||||
explicit OMPTargetDataInfo() = default;
|
||||
OMPTargetDataInfo(Address BasePointersArray, Address PointersArray,
|
||||
Address SizesArray, unsigned NumberOfTargetItems)
|
||||
: BasePointersArray(BasePointersArray), PointersArray(PointersArray),
|
||||
SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {}
|
||||
};
|
||||
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S,
|
||||
const RegionCodeGenTy &BodyGen,
|
||||
OMPTargetDataInfo &InputInfo);
|
||||
|
||||
void EmitOMPParallelDirective(const OMPParallelDirective &S);
|
||||
void EmitOMPSimdDirective(const OMPSimdDirective &S);
|
||||
|
@ -794,7 +794,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
|
||||
|
||||
if (MsanTrackOrigins)
|
||||
CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" +
|
||||
llvm::utostr(MsanTrackOrigins)));
|
||||
Twine(MsanTrackOrigins)));
|
||||
|
||||
if (MsanUseAfterDtor)
|
||||
CmdArgs.push_back("-fsanitize-memory-use-after-dtor");
|
||||
@ -829,7 +829,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
|
||||
|
||||
if (AsanFieldPadding)
|
||||
CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
|
||||
llvm::utostr(AsanFieldPadding)));
|
||||
Twine(AsanFieldPadding)));
|
||||
|
||||
if (AsanUseAfterScope)
|
||||
CmdArgs.push_back("-fsanitize-address-use-after-scope");
|
||||
|
@ -1738,10 +1738,9 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
|
||||
CmdArgs.push_back("-Wreturn-type");
|
||||
|
||||
if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
|
||||
std::string N = llvm::utostr(G.getValue());
|
||||
std::string Opt = std::string("-hexagon-small-data-threshold=") + N;
|
||||
CmdArgs.push_back("-mllvm");
|
||||
CmdArgs.push_back(Args.MakeArgString(Opt));
|
||||
CmdArgs.push_back(Args.MakeArgString("-hexagon-small-data-threshold=" +
|
||||
Twine(G.getValue())));
|
||||
}
|
||||
|
||||
if (!Args.hasArg(options::OPT_fno_short_enums))
|
||||
|
@ -419,8 +419,8 @@ void tools::AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
|
||||
CmdArgs.push_back("-plugin-opt=thinlto");
|
||||
|
||||
if (unsigned Parallelism = getLTOParallelism(Args, D))
|
||||
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=jobs=") +
|
||||
llvm::to_string(Parallelism)));
|
||||
CmdArgs.push_back(
|
||||
Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));
|
||||
|
||||
// If an explicit debugger tuning argument appeared, pass it along.
|
||||
if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
|
||||
|
@ -545,8 +545,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
if (unsigned Parallelism =
|
||||
getLTOParallelism(Args, getToolChain().getDriver())) {
|
||||
CmdArgs.push_back("-mllvm");
|
||||
CmdArgs.push_back(
|
||||
Args.MakeArgString(Twine("-threads=") + llvm::to_string(Parallelism)));
|
||||
CmdArgs.push_back(Args.MakeArgString("-threads=" + Twine(Parallelism)));
|
||||
}
|
||||
|
||||
if (getToolChain().ShouldLinkCXXStdlib(Args))
|
||||
|
@ -138,16 +138,15 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
const Driver &D = HTC.getDriver();
|
||||
ArgStringList CmdArgs;
|
||||
|
||||
std::string MArchString = "-march=hexagon";
|
||||
CmdArgs.push_back(Args.MakeArgString(MArchString));
|
||||
CmdArgs.push_back("-march=hexagon");
|
||||
|
||||
RenderExtraToolArgs(JA, CmdArgs);
|
||||
|
||||
std::string AsName = "hexagon-llvm-mc";
|
||||
std::string MCpuString = "-mcpu=hexagon" +
|
||||
toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
|
||||
const char *AsName = "hexagon-llvm-mc";
|
||||
CmdArgs.push_back("-filetype=obj");
|
||||
CmdArgs.push_back(Args.MakeArgString(MCpuString));
|
||||
CmdArgs.push_back(Args.MakeArgString(
|
||||
"-mcpu=hexagon" +
|
||||
toolchains::HexagonToolChain::GetTargetCPUVersion(Args)));
|
||||
|
||||
if (Output.isFilename()) {
|
||||
CmdArgs.push_back("-o");
|
||||
@ -158,8 +157,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
|
||||
if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
|
||||
std::string N = llvm::utostr(G.getValue());
|
||||
CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N));
|
||||
CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue())));
|
||||
}
|
||||
|
||||
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
|
||||
@ -192,7 +190,7 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
II.getInputArg().render(Args, CmdArgs);
|
||||
}
|
||||
|
||||
auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str()));
|
||||
auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName));
|
||||
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
|
||||
}
|
||||
|
||||
@ -243,10 +241,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back(Opt.c_str());
|
||||
|
||||
CmdArgs.push_back("-march=hexagon");
|
||||
std::string CpuVer =
|
||||
toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
|
||||
std::string MCpuString = "-mcpu=hexagon" + CpuVer;
|
||||
CmdArgs.push_back(Args.MakeArgString(MCpuString));
|
||||
StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args);
|
||||
CmdArgs.push_back(Args.MakeArgString("-mcpu=hexagon" + CpuVer));
|
||||
|
||||
if (IsShared) {
|
||||
CmdArgs.push_back("-shared");
|
||||
@ -261,8 +257,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("-pie");
|
||||
|
||||
if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
|
||||
std::string N = llvm::utostr(G.getValue());
|
||||
CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N));
|
||||
CmdArgs.push_back(Args.MakeArgString("-G" + Twine(G.getValue())));
|
||||
UseG0 = G.getValue() == 0;
|
||||
}
|
||||
|
||||
@ -291,7 +286,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
|
||||
//----------------------------------------------------------------------------
|
||||
// Start Files
|
||||
//----------------------------------------------------------------------------
|
||||
const std::string MCpuSuffix = "/" + CpuVer;
|
||||
const std::string MCpuSuffix = "/" + CpuVer.str();
|
||||
const std::string MCpuG0Suffix = MCpuSuffix + "/G0";
|
||||
const std::string RootDir =
|
||||
HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/";
|
||||
@ -351,7 +346,7 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back("--start-group");
|
||||
|
||||
if (!IsShared) {
|
||||
for (const std::string &Lib : OsLibs)
|
||||
for (StringRef Lib : OsLibs)
|
||||
CmdArgs.push_back(Args.MakeArgString("-l" + Lib));
|
||||
CmdArgs.push_back("-lc");
|
||||
}
|
||||
|
97
contrib/llvm/tools/clang/lib/Headers/avx512bitalgintrin.h
Normal file
97
contrib/llvm/tools/clang/lib/Headers/avx512bitalgintrin.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512bitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512BITALGINTRIN_H
|
||||
#define __AVX512BITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi16(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U,
|
||||
(__v32hi) _mm512_popcnt_epi16(__B),
|
||||
(__v32hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_popcnt_epi8(__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U,
|
||||
(__v64qi) _mm512_popcnt_epi8(__B),
|
||||
(__v64qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
|
||||
_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
391
contrib/llvm/tools/clang/lib/Headers/avx512vbmi2intrin.h
Normal file
391
contrib/llvm/tools/clang/lib/Headers/avx512vbmi2intrin.h
Normal file
@ -0,0 +1,391 @@
|
||||
/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H
|
||||
#define __AVX512VBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
|
||||
(__v32hi) _mm512_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
|
||||
(__v64qi) _mm512_setzero_qi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
|
||||
(__v8di)(B), \
|
||||
(int)(I), \
|
||||
(__v8di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi64(A, B, I) \
|
||||
_mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
|
||||
(__v16si)(B), \
|
||||
(int)(I), \
|
||||
(__v16si)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi32(A, B, I) \
|
||||
_mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
|
||||
(__v32hi)(B), \
|
||||
(int)(I), \
|
||||
(__v32hi)(S), \
|
||||
(__mmask32)(U)); })
|
||||
|
||||
#define _mm512_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shldi_epi16(A, B, I) \
|
||||
_mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
|
||||
(__v8di)(B), \
|
||||
(int)(I), \
|
||||
(__v8di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi64(A, B, I) \
|
||||
_mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
|
||||
(__v16si)(B), \
|
||||
(int)(I), \
|
||||
(__v16si)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi32(A, B, I) \
|
||||
_mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
|
||||
(__v32hi)(B), \
|
||||
(int)(I), \
|
||||
(__v32hi)(S), \
|
||||
(__mmask32)(U)); })
|
||||
|
||||
#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm512_shrdi_epi16(A, B, I) \
|
||||
_mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
|
||||
(__v8di) __A,
|
||||
(__v8di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
|
||||
(__v32hi) __A,
|
||||
(__v32hi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
157
contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h
Normal file
157
contrib/llvm/tools/clang/lib/Headers/avx512vlbitalgintrin.h
Normal file
@ -0,0 +1,157 @@
|
||||
/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlbitalgintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLBITALGINTRIN_H
|
||||
#define __AVX512VLBITALGINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi16(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
|
||||
(__v16hi) _mm256_popcnt_epi16(__B),
|
||||
(__v16hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi16(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
|
||||
(__v8hi) _mm128_popcnt_epi16(__B),
|
||||
(__v8hi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_popcnt_epi8(__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
|
||||
(__v32qi) _mm256_popcnt_epi8(__B),
|
||||
(__v32qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_popcnt_epi8(__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
|
||||
(__v16qi) _mm128_popcnt_epi8(__B),
|
||||
(__v16qi) __A);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
|
||||
__U,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask32 __DEFAULT_FN_ATTRS
|
||||
_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __mmask16 __DEFAULT_FN_ATTRS
|
||||
_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1,
|
||||
__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
748
contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h
Normal file
748
contrib/llvm/tools/clang/lib/Headers/avx512vlvbmi2intrin.h
Normal file
@ -0,0 +1,748 @@
|
||||
/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVBMI2INTRIN_H
|
||||
#define __AVX512VLVBMI2INTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
|
||||
|
||||
static __inline __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_setzero_hi(void) {
|
||||
return (__m128i)(__v8hi){ 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_compress_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi16(__mmask8 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expand_epi8(__mmask16 __U, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
|
||||
(__v8hi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
|
||||
(__v16qi) _mm128_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_setzero_hi(void) {
|
||||
return (__m256i)(__v16hi){ 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
__builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
|
||||
(__v16hi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) __S,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
|
||||
(__v32qi) _mm256_setzero_hi(),
|
||||
__U);
|
||||
}
|
||||
|
||||
#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
|
||||
(__v4di)(B), \
|
||||
(int)(I), \
|
||||
(__v4di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi64(A, B, I) \
|
||||
_mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi64(A, B, I) \
|
||||
_mm128_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
|
||||
(__v8si)(B), \
|
||||
(int)(I), \
|
||||
(__v8si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi32(A, B, I) \
|
||||
_mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi32(A, B, I) \
|
||||
_mm128_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
|
||||
(__v16hi)(B), \
|
||||
(int)(I), \
|
||||
(__v16hi)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shldi_epi16(A, B, I) \
|
||||
_mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shldi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shldi_epi16(A, B, I) \
|
||||
_mm128_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
|
||||
(__v4di)(B), \
|
||||
(int)(I), \
|
||||
(__v4di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi64(A, B, I) \
|
||||
_mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
|
||||
(__v2di)(B), \
|
||||
(int)(I), \
|
||||
(__v2di)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi64(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi64(A, B, I) \
|
||||
_mm128_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
|
||||
(__v8si)(B), \
|
||||
(int)(I), \
|
||||
(__v8si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi32(A, B, I) \
|
||||
_mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
|
||||
(__v4si)(B), \
|
||||
(int)(I), \
|
||||
(__v4si)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi32(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi32(A, B, I) \
|
||||
_mm128_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
|
||||
(__v16hi)(B), \
|
||||
(int)(I), \
|
||||
(__v16hi)(S), \
|
||||
(__mmask16)(U)); })
|
||||
|
||||
#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm256_shrdi_epi16(A, B, I) \
|
||||
_mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
#define _mm128_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
|
||||
(__v8hi)(B), \
|
||||
(int)(I), \
|
||||
(__v8hi)(S), \
|
||||
(__mmask8)(U)); })
|
||||
|
||||
#define _mm128_maskz_shrdi_epi16(U, A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm128_setzero_hi(), (U), (A), (B), (I))
|
||||
|
||||
#define _mm128_shrdi_epi16(A, B, I) \
|
||||
_mm128_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
|
||||
(__v4di) __A,
|
||||
(__v4di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
|
||||
(__v2di) __A,
|
||||
(__v2di) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
|
||||
(__v16hi) __A,
|
||||
(__v16hi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
__U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
|
||||
(__v8hi) __A,
|
||||
(__v8hi) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
254
contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h
Normal file
254
contrib/llvm/tools/clang/lib/Headers/avx512vlvnniintrin.h
Normal file
@ -0,0 +1,254 @@
|
||||
/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VLVNNIINTRIN_H
|
||||
#define __AVX512VLVNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
|
||||
(__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm128_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
|
||||
(__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
146
contrib/llvm/tools/clang/lib/Headers/avx512vnniintrin.h
Normal file
146
contrib/llvm/tools/clang/lib/Headers/avx512vnniintrin.h
Normal file
@ -0,0 +1,146 @@
|
||||
/*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H
|
||||
#define __AVX512VNNIINTRIN_H
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
|
||||
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS
|
||||
_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
|
||||
(__v16si) __A,
|
||||
(__v16si) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
202
contrib/llvm/tools/clang/lib/Headers/gfniintrin.h
Normal file
202
contrib/llvm/tools/clang/lib/Headers/gfniintrin.h
Normal file
@ -0,0 +1,202 @@
|
||||
/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __GFNIINTRIN_H
|
||||
#define __GFNIINTRIN_H
|
||||
|
||||
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S)); })
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S)); })
|
||||
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S)); })
|
||||
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \
|
||||
U, A, B, I); })
|
||||
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \
|
||||
(__v16qi)(__m128i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
|
||||
(__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v16qi)(__m128i)(S)); })
|
||||
|
||||
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \
|
||||
(__v32qi)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
|
||||
(__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v32qi)(__m256i)(S)); })
|
||||
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \
|
||||
U, A, B, I); })
|
||||
|
||||
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \
|
||||
(__v64qi)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
|
||||
(__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \
|
||||
(__v64qi)(__m512i)(S)); })
|
||||
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \
|
||||
(__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \
|
||||
U, A, B, I); })
|
||||
|
||||
/* Default attributes for simple form (no masking). */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni")))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni")))
|
||||
|
||||
/* Default attributes for VLX forms. */
|
||||
#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni")))
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS
|
||||
_mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
|
||||
_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_selectb_128(__U,
|
||||
(__v16qi) _mm_gf2p8mul_epi8(__A, __B),
|
||||
(__v16qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
|
||||
_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
|
||||
{
|
||||
return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
|
||||
_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_selectb_256(__U,
|
||||
(__v32qi) _mm256_gf2p8mul_epi8(__A, __B),
|
||||
(__v32qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
|
||||
_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
|
||||
{
|
||||
return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_selectb_512(__U,
|
||||
(__v64qi) _mm512_gf2p8mul_epi8(__A, __B),
|
||||
(__v64qi) __S);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B)
|
||||
{
|
||||
return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(),
|
||||
__U, __A, __B);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
#undef __DEFAULT_FN_ATTRS_VL
|
||||
|
||||
#endif // __GFNIINTRIN_H
|
||||
|
@ -118,6 +118,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
}
|
||||
#endif /* __AVX2__ */
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
|
||||
#include <vpclmulqdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
|
||||
#include <bmiintrin.h>
|
||||
#endif
|
||||
@ -146,6 +150,10 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512bwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
|
||||
#include <avx512bitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
|
||||
#include <avx512cdintrin.h>
|
||||
#endif
|
||||
@ -159,10 +167,24 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vpopcntdqvlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
|
||||
#include <avx512vnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
||||
#include <avx512vlvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
|
||||
#include <avx512dqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
||||
#include <avx512vlbitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
||||
#include <avx512vlbwintrin.h>
|
||||
@ -200,6 +222,15 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <avx512vbmivlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
|
||||
#include <avx512vbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || \
|
||||
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
||||
#include <avx512vlvbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
|
||||
#include <avx512pfintrin.h>
|
||||
#endif
|
||||
@ -208,6 +239,14 @@ _mm256_cvtph_ps(__m128i __a)
|
||||
#include <pkuintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
|
||||
#include <vaesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
|
||||
#include <gfniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
|
||||
_rdrand16_step(unsigned short *__p)
|
||||
|
98
contrib/llvm/tools/clang/lib/Headers/vaesintrin.h
Normal file
98
contrib/llvm/tools/clang/lib/Headers/vaesintrin.h
Normal file
@ -0,0 +1,98 @@
|
||||
/*===------------------ vaesintrin.h - VAES intrinsics ---------------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vaesintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VAESINTRIN_H
|
||||
#define __VAESINTRIN_H
|
||||
|
||||
/* Default attributes for YMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
|
||||
|
||||
/* Default attributes for ZMM forms. */
|
||||
#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
|
||||
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenc_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenc256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenc_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenc512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdec_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdec256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdec_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdec512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesenclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesenclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m256i __DEFAULT_FN_ATTRS
|
||||
_mm256_aesdeclast_epi128(__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A,
|
||||
(__v4di) __B);
|
||||
}
|
||||
|
||||
static __inline__ __m512i __DEFAULT_FN_ATTRS_F
|
||||
_mm512_aesdeclast_epi128(__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A,
|
||||
(__v8di) __B);
|
||||
}
|
||||
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#undef __DEFAULT_FN_ATTRS_F
|
||||
|
||||
#endif
|
42
contrib/llvm/tools/clang/lib/Headers/vpclmulqdqintrin.h
Normal file
42
contrib/llvm/tools/clang/lib/Headers/vpclmulqdqintrin.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------===
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <vpclmulqdqintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __VPCLMULQDQINTRIN_H
|
||||
#define __VPCLMULQDQINTRIN_H
|
||||
|
||||
#define _mm256_clmulepi64_epi128(A, B, I) __extension__ ({ \
|
||||
(__m256i)__builtin_ia32_pclmulqdq256((__v4di)(__m256i)(A), \
|
||||
(__v4di)(__m256i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#define _mm512_clmulepi64_epi128(A, B, I) __extension__ ({ \
|
||||
(__m512i)__builtin_ia32_pclmulqdq512((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B), \
|
||||
(char)(I)); })
|
||||
|
||||
#endif // __VPCLMULQDQINTRIN_H
|
||||
|
@ -1548,15 +1548,21 @@ void Parser::DiagnoseMisplacedCXX11Attribute(ParsedAttributesWithRange &Attrs,
|
||||
SourceLocation Loc = Tok.getLocation();
|
||||
ParseCXX11Attributes(Attrs);
|
||||
CharSourceRange AttrRange(SourceRange(Loc, Attrs.Range.getEnd()), true);
|
||||
|
||||
// FIXME: use err_attributes_misplaced
|
||||
Diag(Loc, diag::err_attributes_not_allowed)
|
||||
<< FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange)
|
||||
<< FixItHint::CreateRemoval(AttrRange);
|
||||
}
|
||||
|
||||
void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs) {
|
||||
Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed)
|
||||
<< attrs.Range;
|
||||
void Parser::DiagnoseProhibitedAttributes(ParsedAttributesWithRange &attrs,
|
||||
const SourceLocation CorrectLocation) {
|
||||
if (CorrectLocation.isValid()) {
|
||||
CharSourceRange AttrRange(attrs.Range, true);
|
||||
Diag(CorrectLocation, diag::err_attributes_misplaced)
|
||||
<< FixItHint::CreateInsertionFromRange(CorrectLocation, AttrRange)
|
||||
<< FixItHint::CreateRemoval(AttrRange);
|
||||
} else
|
||||
Diag(attrs.Range.getBegin(), diag::err_attributes_not_allowed) << attrs.Range;
|
||||
}
|
||||
|
||||
void Parser::ProhibitCXX11Attributes(ParsedAttributesWithRange &Attrs,
|
||||
|
@ -930,7 +930,31 @@ Parser::ParseDeclOrFunctionDefInternal(ParsedAttributesWithRange &attrs,
|
||||
// C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };"
|
||||
// declaration-specifiers init-declarator-list[opt] ';'
|
||||
if (Tok.is(tok::semi)) {
|
||||
ProhibitAttributes(attrs);
|
||||
auto LengthOfTSTToken = [](DeclSpec::TST TKind) {
|
||||
assert(DeclSpec::isDeclRep(TKind));
|
||||
switch(TKind) {
|
||||
case DeclSpec::TST_class:
|
||||
return 5;
|
||||
case DeclSpec::TST_struct:
|
||||
return 6;
|
||||
case DeclSpec::TST_union:
|
||||
return 5;
|
||||
case DeclSpec::TST_enum:
|
||||
return 4;
|
||||
case DeclSpec::TST_interface:
|
||||
return 9;
|
||||
default:
|
||||
llvm_unreachable("we only expect to get the length of the class/struct/union/enum");
|
||||
}
|
||||
|
||||
};
|
||||
// Suggest correct location to fix '[[attrib]] struct' to 'struct [[attrib]]'
|
||||
SourceLocation CorrectLocationForAttributes =
|
||||
DeclSpec::isDeclRep(DS.getTypeSpecType())
|
||||
? DS.getTypeSpecTypeLoc().getLocWithOffset(
|
||||
LengthOfTSTToken(DS.getTypeSpecType()))
|
||||
: SourceLocation();
|
||||
ProhibitAttributes(attrs, CorrectLocationForAttributes);
|
||||
ConsumeToken();
|
||||
RecordDecl *AnonRecord = nullptr;
|
||||
Decl *TheDecl = Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS_none,
|
||||
|
@ -12265,11 +12265,10 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
|
||||
// Construct the body of the conversion function { return __invoke; }.
|
||||
Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
|
||||
VK_LValue, Conv->getLocation()).get();
|
||||
assert(FunctionRef && "Can't refer to __invoke function?");
|
||||
Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
|
||||
Conv->setBody(new (Context) CompoundStmt(Context, Return,
|
||||
Conv->getLocation(),
|
||||
Conv->getLocation()));
|
||||
assert(FunctionRef && "Can't refer to __invoke function?");
|
||||
Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).get();
|
||||
Conv->setBody(CompoundStmt::Create(Context, Return, Conv->getLocation(),
|
||||
Conv->getLocation()));
|
||||
|
||||
Conv->markUsed(Context);
|
||||
Conv->setReferenced();
|
||||
@ -12330,9 +12329,8 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion(
|
||||
|
||||
// Set the body of the conversion function.
|
||||
Stmt *ReturnS = Return.get();
|
||||
Conv->setBody(new (Context) CompoundStmt(Context, ReturnS,
|
||||
Conv->getLocation(),
|
||||
Conv->getLocation()));
|
||||
Conv->setBody(CompoundStmt::Create(Context, ReturnS, Conv->getLocation(),
|
||||
Conv->getLocation()));
|
||||
Conv->markUsed(Context);
|
||||
|
||||
// We're done; notify the mutation listener, if any.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user