Merge llvm-project release/13.x llvmorg-13.0.0-rc3-8-g08642a395f23
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-13.0.0-rc3-8-g08642a395f23. PR: 258209 MFC after: 2 weeks
This commit is contained in:
commit
8c6f6c0c80
@ -2120,11 +2120,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
|
||||
// Ensure we do not inline the function. This is trivially true for the ones
|
||||
// passed to __kmpc_fork_call but the ones calles in serialized regions
|
||||
// passed to __kmpc_fork_call but the ones called in serialized regions
|
||||
// could be inlined. This is not a perfect but it is closer to the invariant
|
||||
// we want, namely, every data environment starts with a new function.
|
||||
// TODO: We should pass the if condition to the runtime function and do the
|
||||
// handling there. Much cleaner code.
|
||||
OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
|
||||
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
|
||||
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
|
||||
|
@ -2398,7 +2398,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
|
||||
|
||||
// This function heuristically determines whether 'Current' starts the name of a
|
||||
// function declaration.
|
||||
static bool isFunctionDeclarationName(const FormatToken &Current,
|
||||
static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
|
||||
const AnnotatedLine &Line) {
|
||||
auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
|
||||
for (; Next; Next = Next->Next) {
|
||||
@ -2476,14 +2476,21 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
|
||||
if (Next->MatchingParen->Next &&
|
||||
Next->MatchingParen->Next->is(TT_PointerOrReference))
|
||||
return true;
|
||||
// Check for K&R C function definitions, e.g.:
|
||||
|
||||
// Check for K&R C function definitions (and C++ function definitions with
|
||||
// unnamed parameters), e.g.:
|
||||
// int f(i)
|
||||
// {
|
||||
// return i + 1;
|
||||
// }
|
||||
if (Next->Next && Next->Next->is(tok::identifier) &&
|
||||
!(Next->MatchingParen->Next && Next->MatchingParen->Next->is(tok::semi)))
|
||||
// bool g(size_t = 0, bool b = false)
|
||||
// {
|
||||
// return !b;
|
||||
// }
|
||||
if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
|
||||
!Line.endsWith(tok::semi))
|
||||
return true;
|
||||
|
||||
for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
|
||||
Tok = Tok->Next) {
|
||||
if (Tok->is(TT_TypeDeclarationParen))
|
||||
@ -2544,7 +2551,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
|
||||
calculateArrayInitializerColumnList(Line);
|
||||
|
||||
while (Current) {
|
||||
if (isFunctionDeclarationName(*Current, Line))
|
||||
if (isFunctionDeclarationName(Style.isCpp(), *Current, Line))
|
||||
Current->setType(TT_FunctionDeclarationName);
|
||||
if (Current->is(TT_LineComment)) {
|
||||
if (Current->Previous->is(BK_BracedInit) &&
|
||||
|
@ -995,6 +995,13 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
|
||||
Keywords.kw_import, tok::kw_export);
|
||||
}
|
||||
|
||||
// Checks whether a token is a type in K&R C (aka C78).
|
||||
static bool isC78Type(const FormatToken &Tok) {
|
||||
return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
|
||||
tok::kw_unsigned, tok::kw_float, tok::kw_double,
|
||||
tok::identifier);
|
||||
}
|
||||
|
||||
// This function checks whether a token starts the first parameter declaration
|
||||
// in a K&R C (aka C78) function definition, e.g.:
|
||||
// int f(a, b)
|
||||
@ -1002,13 +1009,24 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
|
||||
// {
|
||||
// return a + b;
|
||||
// }
|
||||
static bool isC78ParameterDecl(const FormatToken *Tok) {
|
||||
if (!Tok)
|
||||
static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
|
||||
const FormatToken *FuncName) {
|
||||
assert(Tok);
|
||||
assert(Next);
|
||||
assert(FuncName);
|
||||
|
||||
if (FuncName->isNot(tok::identifier))
|
||||
return false;
|
||||
|
||||
if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double,
|
||||
tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short,
|
||||
tok::kw_unsigned, tok::kw_register, tok::identifier))
|
||||
const FormatToken *Prev = FuncName->Previous;
|
||||
if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
|
||||
return false;
|
||||
|
||||
if (!isC78Type(*Tok) &&
|
||||
!Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
|
||||
return false;
|
||||
|
||||
if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
|
||||
return false;
|
||||
|
||||
Tok = Tok->Previous;
|
||||
@ -1369,21 +1387,20 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
|
||||
case tok::r_brace:
|
||||
addUnwrappedLine();
|
||||
return;
|
||||
case tok::l_paren:
|
||||
case tok::l_paren: {
|
||||
parseParens();
|
||||
// Break the unwrapped line if a K&R C function definition has a parameter
|
||||
// declaration.
|
||||
if (!IsTopLevel || !Style.isCpp())
|
||||
if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
|
||||
break;
|
||||
if (!Previous || Previous->isNot(tok::identifier))
|
||||
break;
|
||||
if (Previous->Previous && Previous->Previous->is(tok::at))
|
||||
break;
|
||||
if (isC78ParameterDecl(FormatTok)) {
|
||||
const unsigned Position = Tokens->getPosition() + 1;
|
||||
assert(Position < AllTokens.size());
|
||||
if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
|
||||
addUnwrappedLine();
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case tok::kw_operator:
|
||||
nextToken();
|
||||
if (FormatTok->isBinaryOperator())
|
||||
|
@ -36,7 +36,7 @@
|
||||
#ifndef _LIBCPP_STD_VER
|
||||
|
||||
#pragma omp begin declare variant match( \
|
||||
device = {arch(nvptx, nvptx64)}, \
|
||||
device = {arch(amdgcn, nvptx, nvptx64)}, \
|
||||
implementation = {extension(match_any, allow_templates)})
|
||||
|
||||
#include <complex_cmath.h>
|
||||
|
@ -3481,7 +3481,8 @@ VerifyInitializationSequenceCXX98(const Sema &S,
|
||||
ExprResult Sema::PerformMoveOrCopyInitialization(
|
||||
const InitializedEntity &Entity, const NamedReturnInfo &NRInfo, Expr *Value,
|
||||
bool SupressSimplerImplicitMoves) {
|
||||
if ((!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
|
||||
if (getLangOpts().CPlusPlus &&
|
||||
(!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
|
||||
NRInfo.isMoveEligible()) {
|
||||
ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(),
|
||||
CK_NoOp, Value, VK_XValue, FPOptionsOverride());
|
||||
|
@ -35,7 +35,7 @@ uintptr_t GetCurrentProcess(void);
|
||||
#include <machine/sysarch.h>
|
||||
#endif
|
||||
|
||||
#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
|
||||
#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv))
|
||||
// clang-format off
|
||||
#include <sys/types.h>
|
||||
#include <machine/sysarch.h>
|
||||
@ -166,6 +166,13 @@ void __clear_cache(void *start, void *end) {
|
||||
: "=r"(start_reg)
|
||||
: "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr));
|
||||
assert(start_reg == 0 && "Cache flush syscall failed.");
|
||||
#elif defined(__riscv) && defined(__OpenBSD__)
|
||||
struct riscv_sync_icache_args arg;
|
||||
|
||||
arg.addr = (uintptr_t)start;
|
||||
arg.len = (uintptr_t)end - (uintptr_t)start;
|
||||
|
||||
sysarch(RISCV_SYNC_ICACHE, &arg);
|
||||
#else
|
||||
#if __APPLE__
|
||||
// On Darwin, sys_icache_invalidate() provides this functionality
|
||||
|
@ -110,6 +110,7 @@ class RegisterInfoPOSIX_arm64
|
||||
|
||||
bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
|
||||
bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
|
||||
bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
|
||||
|
||||
bool IsSVEReg(unsigned reg) const;
|
||||
bool IsSVEZReg(unsigned reg) const;
|
||||
|
@ -374,17 +374,17 @@ namespace llvm {
|
||||
bool operator<(const ELFEntrySizeKey &Other) const {
|
||||
if (SectionName != Other.SectionName)
|
||||
return SectionName < Other.SectionName;
|
||||
if (Flags != Other.Flags)
|
||||
return Flags < Other.Flags;
|
||||
if ((Flags & ELF::SHF_STRINGS) != (Other.Flags & ELF::SHF_STRINGS))
|
||||
return Other.Flags & ELF::SHF_STRINGS;
|
||||
return EntrySize < Other.EntrySize;
|
||||
}
|
||||
};
|
||||
|
||||
// Symbols must be assigned to a section with a compatible entry size and
|
||||
// flags. This map is used to assign unique IDs to sections to distinguish
|
||||
// between sections with identical names but incompatible entry sizes and/or
|
||||
// flags. This can occur when a symbol is explicitly assigned to a section,
|
||||
// e.g. via __attribute__((section("myname"))).
|
||||
// Symbols must be assigned to a section with a compatible entry
|
||||
// size. This map is used to assign unique IDs to sections to
|
||||
// distinguish between sections with identical names but incompatible entry
|
||||
// sizes. This can occur when a symbol is explicitly assigned to a
|
||||
// section, e.g. via __attribute__((section("myname"))).
|
||||
std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap;
|
||||
|
||||
// This set is used to record the generic mergeable section names seen.
|
||||
@ -592,8 +592,6 @@ namespace llvm {
|
||||
|
||||
bool isELFGenericMergeableSection(StringRef Name);
|
||||
|
||||
/// Return the unique ID of the section with the given name, flags and entry
|
||||
/// size, if it exists.
|
||||
Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName,
|
||||
unsigned Flags,
|
||||
unsigned EntrySize);
|
||||
|
@ -65,7 +65,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
|
||||
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
|
||||
bool processMemMove(MemMoveInst *M);
|
||||
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
|
||||
Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
|
||||
Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
|
||||
Align cpyAlign, CallInst *C);
|
||||
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
|
||||
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
|
||||
|
@ -315,10 +315,12 @@ containsUndefinedElement(const Constant *C,
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements();
|
||||
i != e; ++i)
|
||||
if (HasFn(C->getAggregateElement(i)))
|
||||
i != e; ++i) {
|
||||
if (Constant *Elem = C->getAggregateElement(i))
|
||||
if (HasFn(Elem))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -586,7 +586,7 @@ void MCContext::recordELFMergeableSectionInfo(StringRef SectionName,
|
||||
unsigned Flags, unsigned UniqueID,
|
||||
unsigned EntrySize) {
|
||||
bool IsMergeable = Flags & ELF::SHF_MERGE;
|
||||
if (UniqueID == GenericSectionID)
|
||||
if (IsMergeable && (UniqueID == GenericSectionID))
|
||||
ELFSeenGenericMergeableSections.insert(SectionName);
|
||||
|
||||
// For mergeable sections or non-mergeable sections with a generic mergeable
|
||||
|
@ -4161,7 +4161,8 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
|
||||
|
||||
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
|
||||
if (VT.getVectorElementType() == MVT::i32 &&
|
||||
VT.getVectorElementCount().getKnownMinValue() >= 4)
|
||||
VT.getVectorElementCount().getKnownMinValue() >= 4 &&
|
||||
!VT.isFixedLengthVector())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
|
@ -103,8 +103,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
|
||||
getActionDefinitionsBuilder(G_BSWAP)
|
||||
.legalFor({s32, s64, v4s32, v2s32, v2s64})
|
||||
.clampScalar(0, s32, s64)
|
||||
.widenScalarToNextPow2(0)
|
||||
.customIf(typeIs(0, v2s16)); // custom lower as G_REV32 + G_LSHR
|
||||
.widenScalarToNextPow2(0);
|
||||
|
||||
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
|
||||
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
|
||||
@ -799,8 +798,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
|
||||
case TargetOpcode::G_LOAD:
|
||||
case TargetOpcode::G_STORE:
|
||||
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
|
||||
case TargetOpcode::G_BSWAP:
|
||||
return legalizeBSwap(MI, MRI, MIRBuilder);
|
||||
case TargetOpcode::G_SHL:
|
||||
case TargetOpcode::G_ASHR:
|
||||
case TargetOpcode::G_LSHR:
|
||||
@ -1015,46 +1012,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64LegalizerInfo::legalizeBSwap(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_BSWAP);
|
||||
|
||||
// The <2 x half> case needs special lowering because there isn't an
|
||||
// instruction that does that directly. Instead, we widen to <8 x i8>
|
||||
// and emit a G_REV32 followed by a G_LSHR knowing that instruction selection
|
||||
// will later match them as:
|
||||
//
|
||||
// rev32.8b v0, v0
|
||||
// ushr.2s v0, v0, #16
|
||||
//
|
||||
// We could emit those here directly, but it seems better to keep things as
|
||||
// generic as possible through legalization, and avoid committing layering
|
||||
// violations by legalizing & selecting here at the same time.
|
||||
|
||||
Register ValReg = MI.getOperand(1).getReg();
|
||||
assert(LLT::fixed_vector(2, 16) == MRI.getType(ValReg));
|
||||
const LLT v2s32 = LLT::fixed_vector(2, 32);
|
||||
const LLT v8s8 = LLT::fixed_vector(8, 8);
|
||||
const LLT s32 = LLT::scalar(32);
|
||||
|
||||
auto Undef = MIRBuilder.buildUndef(v8s8);
|
||||
auto Insert =
|
||||
MIRBuilder
|
||||
.buildInstr(TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg})
|
||||
.addImm(AArch64::ssub);
|
||||
auto Rev32 = MIRBuilder.buildInstr(AArch64::G_REV32, {v8s8}, {Insert});
|
||||
auto Bitcast = MIRBuilder.buildBitcast(v2s32, Rev32);
|
||||
auto Amt = MIRBuilder.buildConstant(v2s32, 16);
|
||||
auto UShr =
|
||||
MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt});
|
||||
auto Zero = MIRBuilder.buildConstant(s32, 0);
|
||||
auto Extract = MIRBuilder.buildExtractVectorElement(s32, UShr, Zero);
|
||||
MIRBuilder.buildBitcast({MI.getOperand(0).getReg()}, Extract);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &MIRBuilder) const {
|
||||
|
@ -35,8 +35,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
|
||||
MachineInstr &MI) const override;
|
||||
|
||||
private:
|
||||
bool legalizeBSwap(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
MachineIRBuilder &MIRBuilder) const;
|
||||
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
|
@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
|
||||
MF.insert(MBBIter, testMBB);
|
||||
MF.insert(MBBIter, tailMBB);
|
||||
|
||||
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
|
||||
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
|
||||
: Is64Bit ? X86::R11D
|
||||
: X86::EAX;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
|
||||
.addReg(StackPtr)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
|
||||
MF.insert(MBBIter, bodyMBB);
|
||||
MF.insert(MBBIter, footMBB);
|
||||
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
|
||||
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
|
||||
Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
|
||||
: Is64Bit ? X86::R11D
|
||||
: X86::EAX;
|
||||
|
||||
// Setup entry block
|
||||
{
|
||||
|
@ -1996,7 +1996,8 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
|
||||
UndefValue::get(Int8Ty), F->getName() + ".ID");
|
||||
|
||||
for (Use *U : ToBeReplacedStateMachineUses)
|
||||
U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
|
||||
U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
|
||||
ID, U->get()->getType()));
|
||||
|
||||
++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
|
||||
|
||||
@ -3183,10 +3184,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
IsWorker->setDebugLoc(DLoc);
|
||||
BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
|
||||
|
||||
Module &M = *Kernel->getParent();
|
||||
|
||||
// Create local storage for the work function pointer.
|
||||
const DataLayout &DL = M.getDataLayout();
|
||||
Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
|
||||
AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr",
|
||||
&Kernel->getEntryBlock().front());
|
||||
Instruction *WorkFnAI =
|
||||
new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
|
||||
"worker.work_fn.addr", &Kernel->getEntryBlock().front());
|
||||
WorkFnAI->setDebugLoc(DLoc);
|
||||
|
||||
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
|
||||
@ -3199,13 +3204,23 @@ struct AAKernelInfoFunction : AAKernelInfo {
|
||||
Value *Ident = KernelInitCB->getArgOperand(0);
|
||||
Value *GTid = KernelInitCB;
|
||||
|
||||
Module &M = *Kernel->getParent();
|
||||
FunctionCallee BarrierFn =
|
||||
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
|
||||
M, OMPRTL___kmpc_barrier_simple_spmd);
|
||||
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
|
||||
->setDebugLoc(DLoc);
|
||||
|
||||
if (WorkFnAI->getType()->getPointerAddressSpace() !=
|
||||
(unsigned int)AddressSpace::Generic) {
|
||||
WorkFnAI = new AddrSpaceCastInst(
|
||||
WorkFnAI,
|
||||
PointerType::getWithSamePointeeType(
|
||||
cast<PointerType>(WorkFnAI->getType()),
|
||||
(unsigned int)AddressSpace::Generic),
|
||||
WorkFnAI->getName() + ".generic", StateMachineBeginBB);
|
||||
WorkFnAI->setDebugLoc(DLoc);
|
||||
}
|
||||
|
||||
FunctionCallee KernelParallelFn =
|
||||
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
|
||||
M, OMPRTL___kmpc_kernel_parallel);
|
||||
|
@ -178,9 +178,9 @@ class MemsetRanges {
|
||||
}
|
||||
|
||||
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
|
||||
int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
|
||||
|
||||
addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
|
||||
TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
|
||||
assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
|
||||
addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
|
||||
SI->getAlign().value(), SI);
|
||||
}
|
||||
|
||||
@ -371,6 +371,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||
Value *ByteVal) {
|
||||
const DataLayout &DL = StartInst->getModule()->getDataLayout();
|
||||
|
||||
// We can't track scalable types
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
|
||||
if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
|
||||
return nullptr;
|
||||
|
||||
// Okay, so we now have a single store that can be splatable. Scan to find
|
||||
// all subsequent stores of the same value to offset from the same pointer.
|
||||
// Join these together into ranges, so we can decide whether contiguous blocks
|
||||
@ -426,6 +431,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
|
||||
if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
|
||||
break;
|
||||
|
||||
// We can't track ranges involving scalable types.
|
||||
if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
|
||||
break;
|
||||
|
||||
// Check to see if this stored value is of the same byte-splattable value.
|
||||
Value *StoredByte = isBytewiseValue(StoredVal, DL);
|
||||
if (isa<UndefValue>(ByteVal) && StoredByte)
|
||||
@ -859,7 +868,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
|
||||
/// the call write its result directly into the destination of the memcpy.
|
||||
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
|
||||
Instruction *cpyStore, Value *cpyDest,
|
||||
Value *cpySrc, uint64_t cpyLen,
|
||||
Value *cpySrc, TypeSize cpySize,
|
||||
Align cpyAlign, CallInst *C) {
|
||||
// The general transformation to keep in mind is
|
||||
//
|
||||
@ -875,6 +884,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
|
||||
// src only holds uninitialized values at the moment of the call, meaning that
|
||||
// the memcpy can be discarded rather than moved.
|
||||
|
||||
// We can't optimize scalable types.
|
||||
if (cpySize.isScalable())
|
||||
return false;
|
||||
|
||||
// Lifetime marks shouldn't be operated on.
|
||||
if (Function *F = C->getCalledFunction())
|
||||
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
|
||||
@ -893,13 +906,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
|
||||
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
|
||||
srcArraySize->getZExtValue();
|
||||
|
||||
if (cpyLen < srcSize)
|
||||
if (cpySize < srcSize)
|
||||
return false;
|
||||
|
||||
// Check that accessing the first srcSize bytes of dest will not cause a
|
||||
// trap. Otherwise the transform is invalid since it might cause a trap
|
||||
// to occur earlier than it otherwise would.
|
||||
if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
|
||||
if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
|
||||
DL, C, DT))
|
||||
return false;
|
||||
|
||||
@ -1452,8 +1465,9 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
// of conservatively taking the minimum?
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment,
|
||||
if (performCallSlotOptzn(
|
||||
M, M, M->getDest(), M->getSource(),
|
||||
TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
|
||||
C)) {
|
||||
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
|
||||
<< " call: " << *C << "\n"
|
||||
@ -1509,7 +1523,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
|
||||
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
|
||||
M->getSourceAlign().valueOrOne());
|
||||
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
|
||||
CopySize->getZExtValue(), Alignment, C)) {
|
||||
TypeSize::getFixed(CopySize->getZExtValue()),
|
||||
Alignment, C)) {
|
||||
eraseInstruction(M);
|
||||
++NumMemCpyInstr;
|
||||
return true;
|
||||
@ -1584,7 +1599,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
|
||||
// Find out what feeds this byval argument.
|
||||
Value *ByValArg = CB.getArgOperand(ArgNo);
|
||||
Type *ByValTy = CB.getParamByValType(ArgNo);
|
||||
uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
|
||||
TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
|
||||
MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
|
||||
MemCpyInst *MDep = nullptr;
|
||||
if (EnableMemorySSA) {
|
||||
@ -1612,7 +1627,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
|
||||
|
||||
// The length of the memcpy must be larger or equal to the size of the byval.
|
||||
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
|
||||
if (!C1 || C1->getValue().getZExtValue() < ByValSize)
|
||||
if (!C1 || !TypeSize::isKnownGE(
|
||||
TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
|
||||
return false;
|
||||
|
||||
// Get the alignment of the byval. If the call doesn't specify the alignment,
|
||||
|
@ -1587,10 +1587,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
|
||||
BB->eraseFromParent();
|
||||
}
|
||||
|
||||
static void deleteDeadBlocksFromLoop(Loop &L,
|
||||
static void
|
||||
deleteDeadBlocksFromLoop(Loop &L,
|
||||
SmallVectorImpl<BasicBlock *> &ExitBlocks,
|
||||
DominatorTree &DT, LoopInfo &LI,
|
||||
MemorySSAUpdater *MSSAU) {
|
||||
MemorySSAUpdater *MSSAU,
|
||||
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
|
||||
// Find all the dead blocks tied to this loop, and remove them from their
|
||||
// successors.
|
||||
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
|
||||
@ -1640,6 +1642,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
|
||||
}) &&
|
||||
"If the child loop header is dead all blocks in the child loop must "
|
||||
"be dead as well!");
|
||||
DestroyLoopCB(*ChildL, ChildL->getName());
|
||||
LI.destroy(ChildL);
|
||||
return true;
|
||||
});
|
||||
@ -1980,6 +1983,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
|
||||
ParentL->removeChildLoop(llvm::find(*ParentL, &L));
|
||||
else
|
||||
LI.removeLoop(llvm::find(LI, &L));
|
||||
// markLoopAsDeleted for L should be triggered by the caller (it is typically
|
||||
// done by using the UnswitchCB callback).
|
||||
LI.destroy(&L);
|
||||
return false;
|
||||
}
|
||||
@ -2019,7 +2024,8 @@ static void unswitchNontrivialInvariants(
|
||||
SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
|
||||
DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
||||
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
|
||||
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
|
||||
auto *ParentBB = TI.getParent();
|
||||
BranchInst *BI = dyn_cast<BranchInst>(&TI);
|
||||
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
|
||||
@ -2319,7 +2325,7 @@ static void unswitchNontrivialInvariants(
|
||||
// Now that our cloned loops have been built, we can update the original loop.
|
||||
// First we delete the dead blocks from it and then we rebuild the loop
|
||||
// structure taking these deletions into account.
|
||||
deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
|
||||
deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
|
||||
|
||||
if (MSSAU && VerifyMemorySSA)
|
||||
MSSAU->getMemorySSA()->verifyMemorySSA();
|
||||
@ -2670,7 +2676,8 @@ static bool unswitchBestCondition(
|
||||
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
||||
AAResults &AA, TargetTransformInfo &TTI,
|
||||
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
|
||||
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
|
||||
// Collect all invariant conditions within this loop (as opposed to an inner
|
||||
// loop which would be handled when visiting that inner loop).
|
||||
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
|
||||
@ -2958,7 +2965,7 @@ static bool unswitchBestCondition(
|
||||
<< "\n");
|
||||
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
|
||||
ExitBlocks, PartialIVInfo, DT, LI, AC,
|
||||
UnswitchCB, SE, MSSAU);
|
||||
UnswitchCB, SE, MSSAU, DestroyLoopCB);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2988,7 +2995,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
||||
AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
|
||||
bool NonTrivial,
|
||||
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
|
||||
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
|
||||
function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
|
||||
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
|
||||
"Loops must be in LCSSA form before unswitching.");
|
||||
|
||||
@ -3036,7 +3044,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
|
||||
|
||||
// Try to unswitch the best invariant condition. We prefer this full unswitch to
|
||||
// a partial unswitch when possible below the threshold.
|
||||
if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
|
||||
if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
|
||||
DestroyLoopCB))
|
||||
return true;
|
||||
|
||||
// No other opportunities to unswitch.
|
||||
@ -3083,6 +3092,10 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
U.markLoopAsDeleted(L, LoopName);
|
||||
};
|
||||
|
||||
auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
|
||||
U.markLoopAsDeleted(L, Name);
|
||||
};
|
||||
|
||||
Optional<MemorySSAUpdater> MSSAU;
|
||||
if (AR.MSSA) {
|
||||
MSSAU = MemorySSAUpdater(AR.MSSA);
|
||||
@ -3091,7 +3104,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
|
||||
}
|
||||
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
|
||||
UnswitchCB, &AR.SE,
|
||||
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
|
||||
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
|
||||
DestroyLoopCB))
|
||||
return PreservedAnalyses::all();
|
||||
|
||||
if (AR.MSSA && VerifyMemorySSA)
|
||||
@ -3179,12 +3193,17 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
LPM.markLoopAsDeleted(*L);
|
||||
};
|
||||
|
||||
auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
|
||||
LPM.markLoopAsDeleted(L);
|
||||
};
|
||||
|
||||
if (MSSA && VerifyMemorySSA)
|
||||
MSSA->verifyMemorySSA();
|
||||
|
||||
bool Changed =
|
||||
unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
|
||||
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
|
||||
MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
|
||||
DestroyLoopCB);
|
||||
|
||||
if (MSSA && VerifyMemorySSA)
|
||||
MSSA->verifyMemorySSA();
|
||||
|
@ -1094,17 +1094,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
|
||||
|
||||
// Update (liveout) uses of bonus instructions,
|
||||
// now that the bonus instruction has been cloned into predecessor.
|
||||
SSAUpdater SSAUpdate;
|
||||
SSAUpdate.Initialize(BonusInst.getType(),
|
||||
(NewBonusInst->getName() + ".merge").str());
|
||||
SSAUpdate.AddAvailableValue(BB, &BonusInst);
|
||||
SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
|
||||
// Note that we expect to be in a block-closed SSA form for this to work!
|
||||
for (Use &U : make_early_inc_range(BonusInst.uses())) {
|
||||
auto *UI = cast<Instruction>(U.getUser());
|
||||
if (UI->getParent() != PredBlock)
|
||||
SSAUpdate.RewriteUseAfterInsertions(U);
|
||||
else // Use is in the same block as, and comes before, NewBonusInst.
|
||||
SSAUpdate.RewriteUse(U);
|
||||
auto *PN = dyn_cast<PHINode>(UI);
|
||||
if (!PN) {
|
||||
assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
|
||||
"If the user is not a PHI node, then it should be in the same "
|
||||
"block as, and come after, the original bonus instruction.");
|
||||
continue; // Keep using the original bonus instruction.
|
||||
}
|
||||
// Is this the block-closed SSA form PHI node?
|
||||
if (PN->getIncomingBlock(U) == BB)
|
||||
continue; // Great, keep using the original bonus instruction.
|
||||
// The only other alternative is an "use" when coming from
|
||||
// the predecessor block - here we should refer to the cloned bonus instr.
|
||||
assert(PN->getIncomingBlock(U) == PredBlock &&
|
||||
"Not in block-closed SSA form?");
|
||||
U.set(NewBonusInst);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3207,6 +3214,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
|
||||
// Early exits once we reach the limit.
|
||||
if (NumBonusInsts > BonusInstThreshold)
|
||||
return false;
|
||||
|
||||
auto IsBCSSAUse = [BB, &I](Use &U) {
|
||||
auto *UI = cast<Instruction>(U.getUser());
|
||||
if (auto *PN = dyn_cast<PHINode>(UI))
|
||||
return PN->getIncomingBlock(U) == BB;
|
||||
return UI->getParent() == BB && I.comesBefore(UI);
|
||||
};
|
||||
|
||||
// Does this instruction require rewriting of uses?
|
||||
if (!all_of(I.uses(), IsBCSSAUse))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ok, we have the budget. Perform the transformation.
|
||||
|
@ -234,12 +234,15 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
|
||||
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
|
||||
VPValue *PredInst1 =
|
||||
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
|
||||
for (VPUser *U : Phi1ToMove.getVPSingleValue()->users()) {
|
||||
VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
|
||||
SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(),
|
||||
Phi1ToMoveV->user_end());
|
||||
for (VPUser *U : Users) {
|
||||
auto *UI = dyn_cast<VPRecipeBase>(U);
|
||||
if (!UI || UI->getParent() != Then2)
|
||||
continue;
|
||||
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
|
||||
if (Phi1ToMove.getVPSingleValue() != U->getOperand(I))
|
||||
if (Phi1ToMoveV != U->getOperand(I))
|
||||
continue;
|
||||
U->setOperand(I, PredInst1);
|
||||
}
|
||||
|
@ -1,14 +1,14 @@
|
||||
// $FreeBSD$
|
||||
|
||||
#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
|
||||
#define LLVM_REVISION "llvmorg-13.0.0-rc3-8-g08642a395f23"
|
||||
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
||||
#define CLANG_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
|
||||
#define CLANG_REVISION "llvmorg-13.0.0-rc3-8-g08642a395f23"
|
||||
#define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
||||
// <Upstream revision at import>-<Local identifier in __FreeBSD_version style>
|
||||
#define LLD_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5-1400002"
|
||||
#define LLD_REVISION "llvmorg-13.0.0-rc3-8-g08642a395f23-1400002"
|
||||
#define LLD_REPOSITORY "FreeBSD"
|
||||
|
||||
#define LLDB_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
|
||||
#define LLDB_REVISION "llvmorg-13.0.0-rc3-8-g08642a395f23"
|
||||
#define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
@ -1,3 +1,3 @@
|
||||
/* $FreeBSD$ */
|
||||
#define LLVM_REVISION "llvmorg-13.0.0-rc2-43-gf56129fe78d5"
|
||||
#define LLVM_REVISION "llvmorg-13.0.0-rc3-8-g08642a395f23"
|
||||
#define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"
|
||||
|
Loading…
Reference in New Issue
Block a user