Vendor import of llvm-project branch release/13.x llvmorg-13.0.0-rc1-97-g23ba3732246a.
This commit is contained in:
parent
9cb5bdb8b2
commit
d545c2ce5a
@ -243,6 +243,9 @@ TARGET_HEADER_BUILTIN(_ReadStatusReg, "LLii", "nh", "intrin.h", ALL_MS_LANGUAG
|
||||
TARGET_HEADER_BUILTIN(_WriteStatusReg, "viLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
TARGET_HEADER_BUILTIN(_AddressOfReturnAddress, "v*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
|
||||
TARGET_HEADER_BUILTIN(__mulh, "SLLiSLLiSLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
|
||||
|
||||
#undef BUILTIN
|
||||
#undef LANGBUILTIN
|
||||
#undef TARGET_HEADER_BUILTIN
|
||||
|
@ -354,6 +354,9 @@ public:
|
||||
/// A list of all -fno-builtin-* function names (e.g., memset).
|
||||
std::vector<std::string> NoBuiltinFuncs;
|
||||
|
||||
/// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE().
|
||||
std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
|
||||
|
||||
/// Triples of the OpenMP targets that the host code codegen should
|
||||
/// take into account in order to generate accurate offloading descriptors.
|
||||
std::vector<llvm::Triple> OMPTargetTriples;
|
||||
@ -460,6 +463,9 @@ public:
|
||||
}
|
||||
|
||||
bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; }
|
||||
|
||||
/// Remap path prefix according to -fmacro-prefix-path option.
|
||||
void remapPathPrefix(SmallString<256> &Path) const;
|
||||
};
|
||||
|
||||
/// Floating point control options
|
||||
|
@ -2825,10 +2825,10 @@ def fcoverage_prefix_map_EQ
|
||||
HelpText<"remap file source paths in coverage mapping">;
|
||||
def ffile_prefix_map_EQ
|
||||
: Joined<["-"], "ffile-prefix-map=">, Group<f_Group>,
|
||||
HelpText<"remap file source paths in debug info and predefined preprocessor macros">;
|
||||
HelpText<"remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE()">;
|
||||
def fmacro_prefix_map_EQ
|
||||
: Joined<["-"], "fmacro-prefix-map=">, Group<Preprocessor_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"remap file source paths in predefined preprocessor macros">;
|
||||
: Joined<["-"], "fmacro-prefix-map=">, Group<f_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"remap file source paths in predefined preprocessor macros and __builtin_FILE()">;
|
||||
defm force_dwarf_frame : BoolFOption<"force-dwarf-frame",
|
||||
CodeGenOpts<"ForceDwarfFrameSection">, DefaultFalse,
|
||||
PosFlag<SetTrue, [CC1Option], "Always emit a debug frame section">, NegFlag<SetFalse>>;
|
||||
|
@ -199,9 +199,6 @@ public:
|
||||
/// build it again.
|
||||
std::shared_ptr<FailedModulesSet> FailedModules;
|
||||
|
||||
/// A prefix map for __FILE__ and __BASE_FILE__.
|
||||
std::map<std::string, std::string, std::greater<std::string>> MacroPrefixMap;
|
||||
|
||||
/// Contains the currently active skipped range mappings for skipping excluded
|
||||
/// conditional directives.
|
||||
///
|
||||
|
@ -7828,8 +7828,7 @@ public:
|
||||
TemplateArgumentLoc &Arg,
|
||||
SmallVectorImpl<TemplateArgument> &Converted);
|
||||
|
||||
bool CheckTemplateArgument(TemplateTypeParmDecl *Param,
|
||||
TypeSourceInfo *Arg);
|
||||
bool CheckTemplateArgument(TypeSourceInfo *Arg);
|
||||
ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
|
||||
QualType InstantiatedParamType, Expr *Arg,
|
||||
TemplateArgument &Converted,
|
||||
|
@ -6066,9 +6066,11 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
|
||||
NNS->getAsNamespaceAlias()->getNamespace()
|
||||
->getOriginalNamespace());
|
||||
|
||||
// The difference between TypeSpec and TypeSpecWithTemplate is that the
|
||||
// latter will have the 'template' keyword when printed.
|
||||
case NestedNameSpecifier::TypeSpec:
|
||||
case NestedNameSpecifier::TypeSpecWithTemplate: {
|
||||
QualType T = getCanonicalType(QualType(NNS->getAsType(), 0));
|
||||
const Type *T = getCanonicalType(NNS->getAsType());
|
||||
|
||||
// If we have some kind of dependent-named type (e.g., "typename T::type"),
|
||||
// break it apart into its prefix and identifier, then reconsititute those
|
||||
@ -6078,14 +6080,16 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
|
||||
// typedef typename T::type T1;
|
||||
// typedef typename T1::type T2;
|
||||
if (const auto *DNT = T->getAs<DependentNameType>())
|
||||
return NestedNameSpecifier::Create(*this, DNT->getQualifier(),
|
||||
const_cast<IdentifierInfo *>(DNT->getIdentifier()));
|
||||
return NestedNameSpecifier::Create(
|
||||
*this, DNT->getQualifier(),
|
||||
const_cast<IdentifierInfo *>(DNT->getIdentifier()));
|
||||
if (const auto *DTST = T->getAs<DependentTemplateSpecializationType>())
|
||||
return NestedNameSpecifier::Create(*this, DTST->getQualifier(), true,
|
||||
const_cast<Type *>(T));
|
||||
|
||||
// Otherwise, just canonicalize the type, and force it to be a TypeSpec.
|
||||
// FIXME: Why are TypeSpec and TypeSpecWithTemplate distinct in the
|
||||
// first place?
|
||||
// TODO: Set 'Template' parameter to true for other template types.
|
||||
return NestedNameSpecifier::Create(*this, nullptr, false,
|
||||
const_cast<Type *>(T.getTypePtr()));
|
||||
const_cast<Type *>(T));
|
||||
}
|
||||
|
||||
case NestedNameSpecifier::Global:
|
||||
|
@ -2233,8 +2233,11 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx,
|
||||
};
|
||||
|
||||
switch (getIdentKind()) {
|
||||
case SourceLocExpr::File:
|
||||
return MakeStringLiteral(PLoc.getFilename());
|
||||
case SourceLocExpr::File: {
|
||||
SmallString<256> Path(PLoc.getFilename());
|
||||
Ctx.getLangOpts().remapPathPrefix(Path);
|
||||
return MakeStringLiteral(Path);
|
||||
}
|
||||
case SourceLocExpr::Function: {
|
||||
const Decl *CurDecl = dyn_cast_or_null<Decl>(Context);
|
||||
return MakeStringLiteral(
|
||||
|
@ -11,6 +11,8 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
|
||||
using namespace clang;
|
||||
|
||||
@ -48,6 +50,12 @@ VersionTuple LangOptions::getOpenCLVersionTuple() const {
|
||||
return VersionTuple(Ver / 100, (Ver % 100) / 10);
|
||||
}
|
||||
|
||||
void LangOptions::remapPathPrefix(SmallString<256> &Path) const {
|
||||
for (const auto &Entry : MacroPrefixMap)
|
||||
if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
|
||||
break;
|
||||
}
|
||||
|
||||
FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) {
|
||||
FPOptions result(LO);
|
||||
return result;
|
||||
|
@ -9732,6 +9732,29 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
||||
return Builder.CreateCall(F);
|
||||
}
|
||||
|
||||
if (BuiltinID == AArch64::BI__mulh || BuiltinID == AArch64::BI__umulh) {
|
||||
llvm::Type *ResType = ConvertType(E->getType());
|
||||
llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
|
||||
|
||||
bool IsSigned = BuiltinID == AArch64::BI__mulh;
|
||||
Value *LHS =
|
||||
Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);
|
||||
Value *RHS =
|
||||
Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);
|
||||
|
||||
Value *MulResult, *HigherBits;
|
||||
if (IsSigned) {
|
||||
MulResult = Builder.CreateNSWMul(LHS, RHS);
|
||||
HigherBits = Builder.CreateAShr(MulResult, 64);
|
||||
} else {
|
||||
MulResult = Builder.CreateNUWMul(LHS, RHS);
|
||||
HigherBits = Builder.CreateLShr(MulResult, 64);
|
||||
}
|
||||
HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
|
||||
|
||||
return HigherBits;
|
||||
}
|
||||
|
||||
// Handle MSVC intrinsics before argument evaluation to prevent double
|
||||
// evaluation.
|
||||
if (Optional<MSVCIntrin> MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID))
|
||||
|
@ -555,7 +555,8 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
|
||||
PrioritizedCXXGlobalInits.size());
|
||||
PrioritizedCXXGlobalInits.push_back(std::make_pair(Key, Fn));
|
||||
} else if (isTemplateInstantiation(D->getTemplateSpecializationKind()) ||
|
||||
getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR) {
|
||||
getContext().GetGVALinkageForVariable(D) == GVA_DiscardableODR ||
|
||||
D->hasAttr<SelectAnyAttr>()) {
|
||||
// C++ [basic.start.init]p2:
|
||||
// Definitions of explicitly specialized class template static data
|
||||
// members have ordered initialization. Other class template static data
|
||||
@ -568,17 +569,18 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
|
||||
// group with the global being initialized. On most platforms, this is a
|
||||
// minor startup time optimization. In the MS C++ ABI, there are no guard
|
||||
// variables, so this COMDAT key is required for correctness.
|
||||
AddGlobalCtor(Fn, 65535, COMDATKey);
|
||||
if (getTarget().getCXXABI().isMicrosoft() && COMDATKey) {
|
||||
// In The MS C++, MS add template static data member in the linker
|
||||
// drective.
|
||||
addUsedGlobal(COMDATKey);
|
||||
}
|
||||
} else if (D->hasAttr<SelectAnyAttr>()) {
|
||||
//
|
||||
// SelectAny globals will be comdat-folded. Put the initializer into a
|
||||
// COMDAT group associated with the global, so the initializers get folded
|
||||
// too.
|
||||
|
||||
AddGlobalCtor(Fn, 65535, COMDATKey);
|
||||
if (COMDATKey && (getTriple().isOSBinFormatELF() ||
|
||||
getTarget().getCXXABI().isMicrosoft())) {
|
||||
// When COMDAT is used on ELF or in the MS C++ ABI, the key must be in
|
||||
// llvm.used to prevent linker GC.
|
||||
addUsedGlobal(COMDATKey);
|
||||
}
|
||||
} else {
|
||||
I = DelayedCXXInitPosition.find(D); // Re-do lookup in case of re-hash.
|
||||
if (I == DelayedCXXInitPosition.end()) {
|
||||
|
@ -186,7 +186,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
|
||||
!getModule().getSourceFileName().empty()) {
|
||||
std::string Path = getModule().getSourceFileName();
|
||||
// Check if a path substitution is needed from the MacroPrefixMap.
|
||||
for (const auto &Entry : PPO.MacroPrefixMap)
|
||||
for (const auto &Entry : LangOpts.MacroPrefixMap)
|
||||
if (Path.rfind(Entry.first, 0) != std::string::npos) {
|
||||
Path = Entry.second + Path.substr(Entry.first.size());
|
||||
break;
|
||||
|
@ -2637,7 +2637,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
|
||||
|
||||
llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath;
|
||||
llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math;
|
||||
StringRef FPContract = "on";
|
||||
StringRef FPContract = "";
|
||||
bool StrictFPModel = false;
|
||||
|
||||
|
||||
@ -2662,7 +2662,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
|
||||
ReciprocalMath = false;
|
||||
SignedZeros = true;
|
||||
// -fno_fast_math restores default denormal and fpcontract handling
|
||||
FPContract = "on";
|
||||
FPContract = "";
|
||||
DenormalFPMath = llvm::DenormalMode::getIEEE();
|
||||
|
||||
// FIXME: The target may have picked a non-IEEE default mode here based on
|
||||
@ -2682,18 +2682,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
|
||||
// ffp-model= is a Driver option, it is entirely rewritten into more
|
||||
// granular options before being passed into cc1.
|
||||
// Use the gcc option in the switch below.
|
||||
if (!FPModel.empty() && !FPModel.equals(Val))
|
||||
if (!FPModel.empty() && !FPModel.equals(Val)) {
|
||||
D.Diag(clang::diag::warn_drv_overriding_flag_option)
|
||||
<< Args.MakeArgString("-ffp-model=" + FPModel)
|
||||
<< Args.MakeArgString("-ffp-model=" + Val);
|
||||
FPContract = "";
|
||||
}
|
||||
if (Val.equals("fast")) {
|
||||
optID = options::OPT_ffast_math;
|
||||
FPModel = Val;
|
||||
FPContract = Val;
|
||||
FPContract = "fast";
|
||||
} else if (Val.equals("precise")) {
|
||||
optID = options::OPT_ffp_contract;
|
||||
FPModel = Val;
|
||||
FPContract = "on";
|
||||
FPContract = "fast";
|
||||
PreciseFPModel = true;
|
||||
} else if (Val.equals("strict")) {
|
||||
StrictFPModel = true;
|
||||
@ -2779,11 +2781,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
|
||||
case options::OPT_ffp_contract: {
|
||||
StringRef Val = A->getValue();
|
||||
if (PreciseFPModel) {
|
||||
// When -ffp-model=precise is seen on the command line,
|
||||
// the boolean PreciseFPModel is set to true which indicates
|
||||
// "the current option is actually PreciseFPModel". The optID
|
||||
// is changed to OPT_ffp_contract and FPContract is set to "on".
|
||||
// the argument Val string is "precise": it shouldn't be checked.
|
||||
// -ffp-model=precise enables ffp-contract=fast as a side effect
|
||||
// the FPContract value has already been set to a string literal
|
||||
// and the Val string isn't a pertinent value.
|
||||
;
|
||||
} else if (Val.equals("fast") || Val.equals("on") || Val.equals("off"))
|
||||
FPContract = Val;
|
||||
@ -2881,17 +2881,18 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D,
|
||||
// -fno_fast_math restores default denormal and fpcontract handling
|
||||
DenormalFPMath = DefaultDenormalFPMath;
|
||||
DenormalFP32Math = llvm::DenormalMode::getIEEE();
|
||||
FPContract = "on";
|
||||
FPContract = "";
|
||||
break;
|
||||
}
|
||||
if (StrictFPModel) {
|
||||
// If -ffp-model=strict has been specified on command line but
|
||||
// subsequent options conflict then emit warning diagnostic.
|
||||
if (HonorINFs && HonorNaNs && !AssociativeMath && !ReciprocalMath &&
|
||||
SignedZeros && TrappingMath && RoundingFPMath &&
|
||||
DenormalFPMath == llvm::DenormalMode::getIEEE() &&
|
||||
DenormalFP32Math == llvm::DenormalMode::getIEEE() &&
|
||||
FPContract.equals("off"))
|
||||
if (HonorINFs && HonorNaNs &&
|
||||
!AssociativeMath && !ReciprocalMath &&
|
||||
SignedZeros && TrappingMath && RoundingFPMath &&
|
||||
(FPContract.equals("off") || FPContract.empty()) &&
|
||||
DenormalFPMath == llvm::DenormalMode::getIEEE() &&
|
||||
DenormalFP32Math == llvm::DenormalMode::getIEEE())
|
||||
// OK: Current Arg doesn't conflict with -ffp-model=strict
|
||||
;
|
||||
else {
|
||||
@ -7690,8 +7691,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
assert(CurTC == nullptr && "Expected one dependence!");
|
||||
CurTC = TC;
|
||||
});
|
||||
UB += C.addTempFile(
|
||||
C.getArgs().MakeArgString(CurTC->getInputFilename(Inputs[I])));
|
||||
} else {
|
||||
UB += CurTC->getInputFilename(Inputs[I]);
|
||||
}
|
||||
UB += CurTC->getInputFilename(Inputs[I]);
|
||||
}
|
||||
CmdArgs.push_back(TCArgs.MakeArgString(UB));
|
||||
|
||||
|
@ -588,21 +588,43 @@ void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
|
||||
|
||||
void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
|
||||
ArgStringList &CC1Args) const {
|
||||
if (DriverArgs.hasArg(options::OPT_nostdinc) ||
|
||||
DriverArgs.hasArg(options::OPT_nostdlibinc))
|
||||
if (DriverArgs.hasArg(options::OPT_nostdinc))
|
||||
return;
|
||||
|
||||
const bool IsELF = !getTriple().isMusl() && !getTriple().isOSLinux();
|
||||
const bool IsLinuxMusl = getTriple().isMusl() && getTriple().isOSLinux();
|
||||
|
||||
const Driver &D = getDriver();
|
||||
if (!D.SysRoot.empty()) {
|
||||
SmallString<128> ResourceDirInclude(D.ResourceDir);
|
||||
if (!IsELF) {
|
||||
llvm::sys::path::append(ResourceDirInclude, "include");
|
||||
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) &&
|
||||
(!IsLinuxMusl || DriverArgs.hasArg(options::OPT_nostdlibinc)))
|
||||
addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
|
||||
}
|
||||
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
|
||||
return;
|
||||
|
||||
const bool HasSysRoot = !D.SysRoot.empty();
|
||||
if (HasSysRoot) {
|
||||
SmallString<128> P(D.SysRoot);
|
||||
if (getTriple().isMusl())
|
||||
if (IsLinuxMusl)
|
||||
llvm::sys::path::append(P, "usr/include");
|
||||
else
|
||||
llvm::sys::path::append(P, "include");
|
||||
|
||||
addExternCSystemInclude(DriverArgs, CC1Args, P.str());
|
||||
return;
|
||||
// LOCAL_INCLUDE_DIR
|
||||
addSystemInclude(DriverArgs, CC1Args, P + "/usr/local/include");
|
||||
// TOOL_INCLUDE_DIR
|
||||
AddMultilibIncludeArgs(DriverArgs, CC1Args);
|
||||
}
|
||||
|
||||
if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && IsLinuxMusl)
|
||||
addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude);
|
||||
|
||||
if (HasSysRoot)
|
||||
return;
|
||||
std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
|
||||
D.PrefixDirs);
|
||||
addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include");
|
||||
|
@ -136,10 +136,13 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
llvm_unreachable("Unsupported target architecture.");
|
||||
}
|
||||
|
||||
if (Args.hasArg(options::OPT_mwindows)) {
|
||||
Arg *SubsysArg =
|
||||
Args.getLastArg(options::OPT_mwindows, options::OPT_mconsole);
|
||||
if (SubsysArg && SubsysArg->getOption().matches(options::OPT_mwindows)) {
|
||||
CmdArgs.push_back("--subsystem");
|
||||
CmdArgs.push_back("windows");
|
||||
} else if (Args.hasArg(options::OPT_mconsole)) {
|
||||
} else if (SubsysArg &&
|
||||
SubsysArg->getOption().matches(options::OPT_mconsole)) {
|
||||
CmdArgs.push_back("--subsystem");
|
||||
CmdArgs.push_back("console");
|
||||
}
|
||||
|
@ -3528,6 +3528,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
|
||||
GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA);
|
||||
else
|
||||
GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA);
|
||||
|
||||
for (const auto &MP : Opts.MacroPrefixMap)
|
||||
GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
|
||||
}
|
||||
|
||||
bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
|
||||
@ -4037,6 +4040,12 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
|
||||
options::OPT_fno_experimental_relative_cxx_abi_vtables,
|
||||
TargetCXXABI::usesRelativeVTables(T));
|
||||
|
||||
for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
|
||||
auto Split = StringRef(A).split('=');
|
||||
Opts.MacroPrefixMap.insert(
|
||||
{std::string(Split.first), std::string(Split.second)});
|
||||
}
|
||||
|
||||
return Diags.getNumErrors() == NumErrorsBefore;
|
||||
}
|
||||
|
||||
@ -4109,9 +4118,6 @@ static void GeneratePreprocessorArgs(PreprocessorOptions &Opts,
|
||||
for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn)
|
||||
GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA);
|
||||
|
||||
for (const auto &MP : Opts.MacroPrefixMap)
|
||||
GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA);
|
||||
|
||||
if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false))
|
||||
GenerateArg(Args, OPT_preamble_bytes_EQ,
|
||||
Twine(Opts.PrecompiledPreambleBytes.first) + "," +
|
||||
@ -4180,12 +4186,6 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
|
||||
for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl))
|
||||
Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue());
|
||||
|
||||
for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) {
|
||||
auto Split = StringRef(A).split('=');
|
||||
Opts.MacroPrefixMap.insert(
|
||||
{std::string(Split.first), std::string(Split.second)});
|
||||
}
|
||||
|
||||
if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) {
|
||||
StringRef Value(A->getValue());
|
||||
size_t Comma = Value.find(',');
|
||||
|
@ -574,6 +574,9 @@ void _WriteStatusReg(int, __int64);
|
||||
unsigned short __cdecl _byteswap_ushort(unsigned short val);
|
||||
unsigned long __cdecl _byteswap_ulong (unsigned long val);
|
||||
unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val);
|
||||
|
||||
__int64 __mulh(__int64 __a, __int64 __b);
|
||||
unsigned __int64 __umulh(unsigned __int64 __a, unsigned __int64 __b);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|
@ -1453,15 +1453,6 @@ static bool isTargetEnvironment(const TargetInfo &TI,
|
||||
return TI.getTriple().getEnvironment() == Env.getEnvironment();
|
||||
}
|
||||
|
||||
static void remapMacroPath(
|
||||
SmallString<256> &Path,
|
||||
const std::map<std::string, std::string, std::greater<std::string>>
|
||||
&MacroPrefixMap) {
|
||||
for (const auto &Entry : MacroPrefixMap)
|
||||
if (llvm::sys::path::replace_path_prefix(Path, Entry.first, Entry.second))
|
||||
break;
|
||||
}
|
||||
|
||||
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
|
||||
/// as a builtin macro, handle it and return the next token as 'Tok'.
|
||||
void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
|
||||
@ -1543,7 +1534,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
|
||||
} else {
|
||||
FN += PLoc.getFilename();
|
||||
}
|
||||
remapMacroPath(FN, PPOpts->MacroPrefixMap);
|
||||
getLangOpts().remapPathPrefix(FN);
|
||||
Lexer::Stringify(FN);
|
||||
OS << '"' << FN << '"';
|
||||
}
|
||||
|
@ -742,22 +742,15 @@ Optional<NormalizedConstraint>
|
||||
NormalizedConstraint::fromConstraintExprs(Sema &S, NamedDecl *D,
|
||||
ArrayRef<const Expr *> E) {
|
||||
assert(E.size() != 0);
|
||||
auto First = fromConstraintExpr(S, D, E[0]);
|
||||
if (E.size() == 1)
|
||||
return First;
|
||||
auto Second = fromConstraintExpr(S, D, E[1]);
|
||||
if (!Second)
|
||||
auto Conjunction = fromConstraintExpr(S, D, E[0]);
|
||||
if (!Conjunction)
|
||||
return None;
|
||||
llvm::Optional<NormalizedConstraint> Conjunction;
|
||||
Conjunction.emplace(S.Context, std::move(*First), std::move(*Second),
|
||||
CCK_Conjunction);
|
||||
for (unsigned I = 2; I < E.size(); ++I) {
|
||||
for (unsigned I = 1; I < E.size(); ++I) {
|
||||
auto Next = fromConstraintExpr(S, D, E[I]);
|
||||
if (!Next)
|
||||
return llvm::Optional<NormalizedConstraint>{};
|
||||
NormalizedConstraint NewConjunction(S.Context, std::move(*Conjunction),
|
||||
return None;
|
||||
*Conjunction = NormalizedConstraint(S.Context, std::move(*Conjunction),
|
||||
std::move(*Next), CCK_Conjunction);
|
||||
*Conjunction = std::move(NewConjunction);
|
||||
}
|
||||
return Conjunction;
|
||||
}
|
||||
|
@ -12472,6 +12472,8 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
|
||||
return false;
|
||||
}
|
||||
|
||||
const NestedNameSpecifier *CNNS =
|
||||
Context.getCanonicalNestedNameSpecifier(Qual);
|
||||
for (LookupResult::iterator I = Prev.begin(), E = Prev.end(); I != E; ++I) {
|
||||
NamedDecl *D = *I;
|
||||
|
||||
@ -12497,8 +12499,7 @@ bool Sema::CheckUsingDeclRedeclaration(SourceLocation UsingLoc,
|
||||
// using decls differ if they name different scopes (but note that
|
||||
// template instantiation can cause this check to trigger when it
|
||||
// didn't before instantiation).
|
||||
if (Context.getCanonicalNestedNameSpecifier(Qual) !=
|
||||
Context.getCanonicalNestedNameSpecifier(DQual))
|
||||
if (CNNS != Context.getCanonicalNestedNameSpecifier(DQual))
|
||||
continue;
|
||||
|
||||
Diag(NameLoc, diag::err_using_decl_redeclaration) << SS.getRange();
|
||||
|
@ -1079,7 +1079,7 @@ NamedDecl *Sema::ActOnTypeParameter(Scope *S, bool Typename,
|
||||
return Param;
|
||||
|
||||
// Check the template argument itself.
|
||||
if (CheckTemplateArgument(Param, DefaultTInfo)) {
|
||||
if (CheckTemplateArgument(DefaultTInfo)) {
|
||||
Param->setInvalidDecl();
|
||||
return Param;
|
||||
}
|
||||
@ -5042,7 +5042,7 @@ bool Sema::CheckTemplateTypeArgument(TemplateTypeParmDecl *Param,
|
||||
}
|
||||
}
|
||||
|
||||
if (CheckTemplateArgument(Param, TSI))
|
||||
if (CheckTemplateArgument(TSI))
|
||||
return true;
|
||||
|
||||
// Add the converted template type argument.
|
||||
@ -5661,7 +5661,7 @@ bool Sema::CheckTemplateArgumentList(
|
||||
TemplateArgumentListInfo NewArgs = TemplateArgs;
|
||||
|
||||
// Make sure we get the template parameter list from the most
|
||||
// recentdeclaration, since that is the only one that has is guaranteed to
|
||||
// recent declaration, since that is the only one that is guaranteed to
|
||||
// have all the default template argument information.
|
||||
TemplateParameterList *Params =
|
||||
cast<TemplateDecl>(Template->getMostRecentDecl())
|
||||
@ -6208,8 +6208,7 @@ bool UnnamedLocalNoLinkageFinder::VisitNestedNameSpecifier(
|
||||
///
|
||||
/// This routine implements the semantics of C++ [temp.arg.type]. It
|
||||
/// returns true if an error occurred, and false otherwise.
|
||||
bool Sema::CheckTemplateArgument(TemplateTypeParmDecl *Param,
|
||||
TypeSourceInfo *ArgInfo) {
|
||||
bool Sema::CheckTemplateArgument(TypeSourceInfo *ArgInfo) {
|
||||
assert(ArgInfo && "invalid TypeSourceInfo");
|
||||
QualType Arg = ArgInfo->getType();
|
||||
SourceRange SR = ArgInfo->getTypeLoc().getSourceRange();
|
||||
|
@ -1934,25 +1934,23 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
|
||||
return Req;
|
||||
|
||||
Sema::SFINAETrap Trap(SemaRef);
|
||||
TemplateDeductionInfo Info(Req->getExpr()->getBeginLoc());
|
||||
|
||||
llvm::PointerUnion<Expr *, concepts::Requirement::SubstitutionDiagnostic *>
|
||||
TransExpr;
|
||||
if (Req->isExprSubstitutionFailure())
|
||||
TransExpr = Req->getExprSubstitutionDiagnostic();
|
||||
else {
|
||||
Sema::InstantiatingTemplate ExprInst(SemaRef, Req->getExpr()->getBeginLoc(),
|
||||
Req, Info,
|
||||
Req->getExpr()->getSourceRange());
|
||||
Expr *E = Req->getExpr();
|
||||
TemplateDeductionInfo Info(E->getBeginLoc());
|
||||
Sema::InstantiatingTemplate ExprInst(SemaRef, E->getBeginLoc(), Req, Info,
|
||||
E->getSourceRange());
|
||||
if (ExprInst.isInvalid())
|
||||
return nullptr;
|
||||
ExprResult TransExprRes = TransformExpr(Req->getExpr());
|
||||
ExprResult TransExprRes = TransformExpr(E);
|
||||
if (TransExprRes.isInvalid() || Trap.hasErrorOccurred())
|
||||
TransExpr = createSubstDiag(SemaRef, Info,
|
||||
[&] (llvm::raw_ostream& OS) {
|
||||
Req->getExpr()->printPretty(OS, nullptr,
|
||||
SemaRef.getPrintingPolicy());
|
||||
});
|
||||
TransExpr = createSubstDiag(SemaRef, Info, [&](llvm::raw_ostream &OS) {
|
||||
E->printPretty(OS, nullptr, SemaRef.getPrintingPolicy());
|
||||
});
|
||||
else
|
||||
TransExpr = TransExprRes.get();
|
||||
}
|
||||
@ -1966,6 +1964,7 @@ TemplateInstantiator::TransformExprRequirement(concepts::ExprRequirement *Req) {
|
||||
else if (RetReq.isTypeConstraint()) {
|
||||
TemplateParameterList *OrigTPL =
|
||||
RetReq.getTypeConstraintTemplateParameterList();
|
||||
TemplateDeductionInfo Info(OrigTPL->getTemplateLoc());
|
||||
Sema::InstantiatingTemplate TPLInst(SemaRef, OrigTPL->getTemplateLoc(),
|
||||
Req, Info, OrigTPL->getSourceRange());
|
||||
if (TPLInst.isInvalid())
|
||||
|
@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
|
||||
#endif
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
|
||||
@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
|
||||
#undef INSTR_PROF_RAW_HEADER
|
||||
/* INSTR_PROF_RAW_HEADER end */
|
||||
|
||||
@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
|
||||
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
|
||||
|
||||
/* Raw profile format version (start from 1). */
|
||||
#define INSTR_PROF_RAW_VERSION 6
|
||||
#define INSTR_PROF_RAW_VERSION 7
|
||||
/* Indexed profile format version (start from 1). */
|
||||
#define INSTR_PROF_INDEX_VERSION 7
|
||||
/* Coverage mapping format version (start from 0). */
|
||||
|
@ -116,7 +116,7 @@ uint64_t __llvm_profile_get_size_for_buffer_internal(
|
||||
DataSize, CountersSize, NamesSize, &PaddingBytesBeforeCounters,
|
||||
&PaddingBytesAfterCounters, &PaddingBytesAfterNames);
|
||||
|
||||
return sizeof(__llvm_profile_header) +
|
||||
return sizeof(__llvm_profile_header) + __llvm_write_binary_ids(NULL) +
|
||||
(DataSize * sizeof(__llvm_profile_data)) + PaddingBytesBeforeCounters +
|
||||
(CountersSize * sizeof(uint64_t)) + PaddingBytesAfterCounters +
|
||||
NamesSize + PaddingBytesAfterNames;
|
||||
|
@ -22,6 +22,7 @@ void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *);
|
||||
COMPILER_RT_VISIBILITY
|
||||
uint64_t lprofGetLoadModuleSignature() {
|
||||
/* A very fast way to compute a module signature. */
|
||||
uint64_t Version = __llvm_profile_get_version();
|
||||
uint64_t CounterSize = (uint64_t)(__llvm_profile_end_counters() -
|
||||
__llvm_profile_begin_counters());
|
||||
uint64_t DataSize = __llvm_profile_get_data_size(__llvm_profile_begin_data(),
|
||||
@ -33,7 +34,7 @@ uint64_t lprofGetLoadModuleSignature() {
|
||||
const __llvm_profile_data *FirstD = __llvm_profile_begin_data();
|
||||
|
||||
return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) +
|
||||
(NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0);
|
||||
(NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version;
|
||||
}
|
||||
|
||||
/* Returns 1 if profile is not structurally compatible. */
|
||||
@ -44,7 +45,8 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
|
||||
__llvm_profile_header *Header = (__llvm_profile_header *)ProfileData;
|
||||
__llvm_profile_data *SrcDataStart, *SrcDataEnd, *SrcData, *DstData;
|
||||
SrcDataStart =
|
||||
(__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
|
||||
(__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
|
||||
Header->BinaryIdsSize);
|
||||
SrcDataEnd = SrcDataStart + Header->DataSize;
|
||||
|
||||
if (ProfileSize < sizeof(__llvm_profile_header))
|
||||
@ -63,7 +65,7 @@ int __llvm_profile_check_compatibility(const char *ProfileData,
|
||||
Header->ValueKindLast != IPVK_Last)
|
||||
return 1;
|
||||
|
||||
if (ProfileSize < sizeof(__llvm_profile_header) +
|
||||
if (ProfileSize < sizeof(__llvm_profile_header) + Header->BinaryIdsSize +
|
||||
Header->DataSize * sizeof(__llvm_profile_data) +
|
||||
Header->NamesSize + Header->CountersSize)
|
||||
return 1;
|
||||
@ -91,7 +93,8 @@ int __llvm_profile_merge_from_buffer(const char *ProfileData,
|
||||
const char *SrcValueProfDataStart, *SrcValueProfData;
|
||||
|
||||
SrcDataStart =
|
||||
(__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header));
|
||||
(__llvm_profile_data *)(ProfileData + sizeof(__llvm_profile_header) +
|
||||
Header->BinaryIdsSize);
|
||||
SrcDataEnd = SrcDataStart + Header->DataSize;
|
||||
SrcCountersStart = (uint64_t *)SrcDataEnd;
|
||||
SrcNameStart = (const char *)(SrcCountersStart + Header->CountersSize);
|
||||
|
@ -17,6 +17,15 @@
|
||||
#include "InstrProfiling.h"
|
||||
#include "InstrProfilingInternal.h"
|
||||
|
||||
#if defined(__FreeBSD__) && !defined(ElfW)
|
||||
/*
|
||||
* FreeBSD's elf.h and link.h headers do not define the ElfW(type) macro yet.
|
||||
* If this is added to all supported FreeBSD versions in the future, this
|
||||
* compatibility macro can be removed.
|
||||
*/
|
||||
#define ElfW(type) __ElfN(type)
|
||||
#endif
|
||||
|
||||
#define PROF_DATA_START INSTR_PROF_SECT_START(INSTR_PROF_DATA_COMMON)
|
||||
#define PROF_DATA_STOP INSTR_PROF_SECT_STOP(INSTR_PROF_DATA_COMMON)
|
||||
#define PROF_NAME_START INSTR_PROF_SECT_START(INSTR_PROF_NAME_COMMON)
|
||||
@ -76,6 +85,7 @@ COMPILER_RT_VISIBILITY ValueProfNode *__llvm_profile_end_vnodes(void) {
|
||||
COMPILER_RT_VISIBILITY ValueProfNode *CurrentVNode = &PROF_VNODES_START;
|
||||
COMPILER_RT_VISIBILITY ValueProfNode *EndVNode = &PROF_VNODES_STOP;
|
||||
|
||||
#ifdef NT_GNU_BUILD_ID
|
||||
static size_t RoundUp(size_t size, size_t align) {
|
||||
return (size + align - 1) & ~(align - 1);
|
||||
}
|
||||
@ -179,5 +189,14 @@ COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else /* !NT_GNU_BUILD_ID */
|
||||
/*
|
||||
* Fallback implementation for targets that don't support the GNU
|
||||
* extensions NT_GNU_BUILD_ID and __ehdr_start.
|
||||
*/
|
||||
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -354,6 +354,16 @@
|
||||
# define _LIBCPP_NO_CFI
|
||||
#endif
|
||||
|
||||
// If the compiler supports using_if_exists, pretend we have those functions and they'll
|
||||
// be picked up if the C library provides them.
|
||||
//
|
||||
// TODO: Once we drop support for Clang 12, we can assume the compiler supports using_if_exists
|
||||
// for platforms that don't have a conforming C11 library, so we can drop this whole thing.
|
||||
#if __has_attribute(using_if_exists)
|
||||
# define _LIBCPP_HAS_TIMESPEC_GET
|
||||
# define _LIBCPP_HAS_QUICK_EXIT
|
||||
# define _LIBCPP_HAS_ALIGNED_ALLOC
|
||||
#else
|
||||
#if (defined(__ISO_C_VISIBLE) && (__ISO_C_VISIBLE >= 2011)) || __cplusplus >= 201103L
|
||||
# if defined(__FreeBSD__)
|
||||
# define _LIBCPP_HAS_ALIGNED_ALLOC
|
||||
@ -408,6 +418,7 @@
|
||||
# endif
|
||||
# endif // __APPLE__
|
||||
#endif
|
||||
#endif // __has_attribute(using_if_exists)
|
||||
|
||||
#ifndef _LIBCPP_CXX03_LANG
|
||||
# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
|
||||
|
@ -59,7 +59,7 @@ int timespec_get( struct timespec *ts, int base); // C++17
|
||||
// we're detecting this here instead of in <__config> because we can't include
|
||||
// system headers from <__config>, since it leads to circular module dependencies.
|
||||
// This is also meant to be a very temporary workaround until the SDKs are fixed.
|
||||
#if defined(__APPLE__)
|
||||
#if defined(__APPLE__) && !__has_attribute(using_if_exists)
|
||||
# include <sys/cdefs.h>
|
||||
# if defined(_LIBCPP_HAS_TIMESPEC_GET) && (__DARWIN_C_LEVEL < __DARWIN_C_FULL)
|
||||
# define _LIBCPP_HAS_TIMESPEC_GET_NOT_ACTUALLY_PROVIDED
|
||||
|
@ -607,8 +607,15 @@ public:
|
||||
static_assert((is_same<_CharT, typename traits_type::char_type>::value),
|
||||
"traits_type::char_type must be the same type as CharT");
|
||||
|
||||
#ifdef _LIBCPP_CXX03_LANG
|
||||
// Preserve the ability to compare with literal 0,
|
||||
// and implicitly convert to bool, but not implicitly convert to int.
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
operator void*() const {return fail() ? nullptr : (void*)this;}
|
||||
#else
|
||||
_LIBCPP_INLINE_VISIBILITY
|
||||
explicit operator bool() const {return !fail();}
|
||||
#endif
|
||||
|
||||
_LIBCPP_INLINE_VISIBILITY bool operator!() const {return fail();}
|
||||
_LIBCPP_INLINE_VISIBILITY iostate rdstate() const {return ios_base::rdstate();}
|
||||
|
@ -86,7 +86,8 @@ struct SymbolVersion {
|
||||
struct VersionDefinition {
|
||||
llvm::StringRef name;
|
||||
uint16_t id;
|
||||
std::vector<SymbolVersion> patterns;
|
||||
std::vector<SymbolVersion> nonLocalPatterns;
|
||||
std::vector<SymbolVersion> localPatterns;
|
||||
};
|
||||
|
||||
// This struct contains the global configuration for the linker.
|
||||
|
@ -1351,18 +1351,19 @@ static void readConfigs(opt::InputArgList &args) {
|
||||
}
|
||||
|
||||
assert(config->versionDefinitions.empty());
|
||||
config->versionDefinitions.push_back({"local", (uint16_t)VER_NDX_LOCAL, {}});
|
||||
config->versionDefinitions.push_back(
|
||||
{"global", (uint16_t)VER_NDX_GLOBAL, {}});
|
||||
{"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
|
||||
config->versionDefinitions.push_back(
|
||||
{"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});
|
||||
|
||||
// If --retain-symbol-file is used, we'll keep only the symbols listed in
|
||||
// the file and discard all others.
|
||||
if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
|
||||
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(
|
||||
config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
|
||||
{"*", /*isExternCpp=*/false, /*hasWildcard=*/true});
|
||||
if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
|
||||
for (StringRef s : args::getLines(*buffer))
|
||||
config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(
|
||||
config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
|
||||
{s, /*isExternCpp=*/false, /*hasWildcard=*/false});
|
||||
}
|
||||
|
||||
@ -2069,23 +2070,37 @@ static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
|
||||
if (suffix1[0] != '@' || suffix1[1] == '@')
|
||||
continue;
|
||||
|
||||
// Check whether the default version foo@@v1 exists. If it exists, the
|
||||
// symbol can be found by the name "foo" in the symbol table.
|
||||
Symbol *maybeDefault = symtab->find(name);
|
||||
if (!maybeDefault)
|
||||
// Check the existing symbol foo. We have two special cases to handle:
|
||||
//
|
||||
// * There is a definition of foo@v1 and foo@@v1.
|
||||
// * There is a definition of foo@v1 and foo.
|
||||
Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(name));
|
||||
if (!sym2)
|
||||
continue;
|
||||
const char *suffix2 = maybeDefault->getVersionSuffix();
|
||||
if (suffix2[0] != '@' || suffix2[1] != '@' ||
|
||||
strcmp(suffix1 + 1, suffix2 + 2) != 0)
|
||||
continue;
|
||||
|
||||
// foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
|
||||
map.try_emplace(sym, maybeDefault);
|
||||
// If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
|
||||
// definition error.
|
||||
maybeDefault->resolve(*sym);
|
||||
// Eliminate foo@v1 from the symbol table.
|
||||
sym->symbolKind = Symbol::PlaceholderKind;
|
||||
const char *suffix2 = sym2->getVersionSuffix();
|
||||
if (suffix2[0] == '@' && suffix2[1] == '@' &&
|
||||
strcmp(suffix1 + 1, suffix2 + 2) == 0) {
|
||||
// foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
|
||||
map.try_emplace(sym, sym2);
|
||||
// If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
|
||||
// definition error.
|
||||
sym2->resolve(*sym);
|
||||
// Eliminate foo@v1 from the symbol table.
|
||||
sym->symbolKind = Symbol::PlaceholderKind;
|
||||
} else if (auto *sym1 = dyn_cast<Defined>(sym)) {
|
||||
if (sym2->versionId > VER_NDX_GLOBAL
|
||||
? config->versionDefinitions[sym2->versionId].name == suffix1 + 1
|
||||
: sym1->section == sym2->section && sym1->value == sym2->value) {
|
||||
// Due to an assembler design flaw, if foo is defined, .symver foo,
|
||||
// foo@v1 defines both foo and foo@v1. Unless foo is bound to a
|
||||
// different version, GNU ld makes foo@v1 canonical and elimiates foo.
|
||||
// Emulate its behavior, otherwise we would have foo or foo@@v1 beside
|
||||
// foo@v1. foo@v1 and foo combining does not apply if they are not
|
||||
// defined in the same place.
|
||||
map.try_emplace(sym2, sym);
|
||||
sym2->symbolKind = Symbol::PlaceholderKind;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (map.empty())
|
||||
|
@ -849,17 +849,8 @@ void LinkerScript::diagnoseOrphanHandling() const {
|
||||
}
|
||||
|
||||
uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) {
|
||||
bool isTbss =
|
||||
(ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS;
|
||||
uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot;
|
||||
start = alignTo(start, alignment);
|
||||
uint64_t end = start + size;
|
||||
|
||||
if (isTbss)
|
||||
ctx->threadBssOffset = end - dot;
|
||||
else
|
||||
dot = end;
|
||||
return end;
|
||||
dot = alignTo(dot, alignment) + size;
|
||||
return dot;
|
||||
}
|
||||
|
||||
void LinkerScript::output(InputSection *s) {
|
||||
@ -931,13 +922,24 @@ static OutputSection *findFirstSection(PhdrEntry *load) {
|
||||
// This function assigns offsets to input sections and an output section
|
||||
// for a single sections command (e.g. ".text { *(.text); }").
|
||||
void LinkerScript::assignOffsets(OutputSection *sec) {
|
||||
const bool isTbss = (sec->flags & SHF_TLS) && sec->type == SHT_NOBITS;
|
||||
const bool sameMemRegion = ctx->memRegion == sec->memRegion;
|
||||
const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr;
|
||||
const uint64_t savedDot = dot;
|
||||
ctx->memRegion = sec->memRegion;
|
||||
ctx->lmaRegion = sec->lmaRegion;
|
||||
|
||||
if (sec->flags & SHF_ALLOC) {
|
||||
if (!(sec->flags & SHF_ALLOC)) {
|
||||
// Non-SHF_ALLOC sections have zero addresses.
|
||||
dot = 0;
|
||||
} else if (isTbss) {
|
||||
// Allow consecutive SHF_TLS SHT_NOBITS output sections. The address range
|
||||
// starts from the end address of the previous tbss section.
|
||||
if (ctx->tbssAddr == 0)
|
||||
ctx->tbssAddr = dot;
|
||||
else
|
||||
dot = ctx->tbssAddr;
|
||||
} else {
|
||||
if (ctx->memRegion)
|
||||
dot = ctx->memRegion->curPos;
|
||||
if (sec->addrExpr)
|
||||
@ -950,9 +952,6 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
|
||||
if (ctx->memRegion && ctx->memRegion->curPos < dot)
|
||||
expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
|
||||
ctx->memRegion->name, sec->name);
|
||||
} else {
|
||||
// Non-SHF_ALLOC sections have zero addresses.
|
||||
dot = 0;
|
||||
}
|
||||
|
||||
switchTo(sec);
|
||||
@ -1008,8 +1007,13 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
|
||||
|
||||
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
|
||||
// as they are not part of the process image.
|
||||
if (!(sec->flags & SHF_ALLOC))
|
||||
if (!(sec->flags & SHF_ALLOC)) {
|
||||
dot = savedDot;
|
||||
} else if (isTbss) {
|
||||
// NOBITS TLS sections are similar. Additionally save the end address.
|
||||
ctx->tbssAddr = dot;
|
||||
dot = savedDot;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isDiscardable(OutputSection &sec) {
|
||||
|
@ -247,11 +247,11 @@ class LinkerScript final {
|
||||
// not be used outside of the scope of a call to the above functions.
|
||||
struct AddressState {
|
||||
AddressState();
|
||||
uint64_t threadBssOffset = 0;
|
||||
OutputSection *outSec = nullptr;
|
||||
MemoryRegion *memRegion = nullptr;
|
||||
MemoryRegion *lmaRegion = nullptr;
|
||||
uint64_t lmaOffset = 0;
|
||||
uint64_t tbssAddr = 0;
|
||||
};
|
||||
|
||||
llvm::DenseMap<StringRef, OutputSection *> nameToOutputSection;
|
||||
|
@ -527,6 +527,13 @@ static SmallSet<SharedSymbol *, 4> getSymbolsAt(SharedSymbol &ss) {
|
||||
if (auto *alias = dyn_cast_or_null<SharedSymbol>(sym))
|
||||
ret.insert(alias);
|
||||
}
|
||||
|
||||
// The loop does not check SHT_GNU_verneed, so ret does not contain
|
||||
// non-default version symbols. If ss has a non-default version, ret won't
|
||||
// contain ss. Just add ss unconditionally. If a non-default version alias is
|
||||
// separately copy relocated, it and ss will have different addresses.
|
||||
// Fortunately this case is impractical and fails with GNU ld as well.
|
||||
ret.insert(&ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1496,9 +1496,9 @@ void ScriptParser::readAnonymousDeclaration() {
|
||||
std::vector<SymbolVersion> globals;
|
||||
std::tie(locals, globals) = readSymbols();
|
||||
for (const SymbolVersion &pat : locals)
|
||||
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
|
||||
config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat);
|
||||
for (const SymbolVersion &pat : globals)
|
||||
config->versionDefinitions[VER_NDX_GLOBAL].patterns.push_back(pat);
|
||||
config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat);
|
||||
|
||||
expect(";");
|
||||
}
|
||||
@ -1510,13 +1510,12 @@ void ScriptParser::readVersionDeclaration(StringRef verStr) {
|
||||
std::vector<SymbolVersion> locals;
|
||||
std::vector<SymbolVersion> globals;
|
||||
std::tie(locals, globals) = readSymbols();
|
||||
for (const SymbolVersion &pat : locals)
|
||||
config->versionDefinitions[VER_NDX_LOCAL].patterns.push_back(pat);
|
||||
|
||||
// Create a new version definition and add that to the global symbols.
|
||||
VersionDefinition ver;
|
||||
ver.name = verStr;
|
||||
ver.patterns = globals;
|
||||
ver.nonLocalPatterns = std::move(globals);
|
||||
ver.localPatterns = std::move(locals);
|
||||
ver.id = config->versionDefinitions.size();
|
||||
config->versionDefinitions.push_back(ver);
|
||||
|
||||
|
@ -134,9 +134,20 @@ static bool canBeVersioned(const Symbol &sym) {
|
||||
StringMap<std::vector<Symbol *>> &SymbolTable::getDemangledSyms() {
|
||||
if (!demangledSyms) {
|
||||
demangledSyms.emplace();
|
||||
std::string demangled;
|
||||
for (Symbol *sym : symVector)
|
||||
if (canBeVersioned(*sym))
|
||||
(*demangledSyms)[demangleItanium(sym->getName())].push_back(sym);
|
||||
if (canBeVersioned(*sym)) {
|
||||
StringRef name = sym->getName();
|
||||
size_t pos = name.find('@');
|
||||
if (pos == std::string::npos)
|
||||
demangled = demangleItanium(name);
|
||||
else if (pos + 1 == name.size() || name[pos + 1] == '@')
|
||||
demangled = demangleItanium(name.substr(0, pos));
|
||||
else
|
||||
demangled =
|
||||
(demangleItanium(name.substr(0, pos)) + name.substr(pos)).str();
|
||||
(*demangledSyms)[demangled].push_back(sym);
|
||||
}
|
||||
}
|
||||
return *demangledSyms;
|
||||
}
|
||||
@ -150,19 +161,29 @@ std::vector<Symbol *> SymbolTable::findByVersion(SymbolVersion ver) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver) {
|
||||
std::vector<Symbol *> SymbolTable::findAllByVersion(SymbolVersion ver,
|
||||
bool includeNonDefault) {
|
||||
std::vector<Symbol *> res;
|
||||
SingleStringMatcher m(ver.name);
|
||||
auto check = [&](StringRef name) {
|
||||
size_t pos = name.find('@');
|
||||
if (!includeNonDefault)
|
||||
return pos == StringRef::npos;
|
||||
return !(pos + 1 < name.size() && name[pos + 1] == '@');
|
||||
};
|
||||
|
||||
if (ver.isExternCpp) {
|
||||
for (auto &p : getDemangledSyms())
|
||||
if (m.match(p.first()))
|
||||
res.insert(res.end(), p.second.begin(), p.second.end());
|
||||
for (Symbol *sym : p.second)
|
||||
if (check(sym->getName()))
|
||||
res.push_back(sym);
|
||||
return res;
|
||||
}
|
||||
|
||||
for (Symbol *sym : symVector)
|
||||
if (canBeVersioned(*sym) && m.match(sym->getName()))
|
||||
if (canBeVersioned(*sym) && check(sym->getName()) &&
|
||||
m.match(sym->getName()))
|
||||
res.push_back(sym);
|
||||
return res;
|
||||
}
|
||||
@ -172,7 +193,7 @@ void SymbolTable::handleDynamicList() {
|
||||
for (SymbolVersion &ver : config->dynamicList) {
|
||||
std::vector<Symbol *> syms;
|
||||
if (ver.hasWildcard)
|
||||
syms = findAllByVersion(ver);
|
||||
syms = findAllByVersion(ver, /*includeNonDefault=*/true);
|
||||
else
|
||||
syms = findByVersion(ver);
|
||||
|
||||
@ -181,21 +202,13 @@ void SymbolTable::handleDynamicList() {
|
||||
}
|
||||
}
|
||||
|
||||
// Set symbol versions to symbols. This function handles patterns
|
||||
// containing no wildcard characters.
|
||||
void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
StringRef versionName) {
|
||||
if (ver.hasWildcard)
|
||||
return;
|
||||
|
||||
// Set symbol versions to symbols. This function handles patterns containing no
|
||||
// wildcard characters. Return false if no symbol definition matches ver.
|
||||
bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
StringRef versionName,
|
||||
bool includeNonDefault) {
|
||||
// Get a list of symbols which we need to assign the version to.
|
||||
std::vector<Symbol *> syms = findByVersion(ver);
|
||||
if (syms.empty()) {
|
||||
if (!config->undefinedVersion)
|
||||
error("version script assignment of '" + versionName + "' to symbol '" +
|
||||
ver.name + "' failed: symbol not defined");
|
||||
return;
|
||||
}
|
||||
|
||||
auto getName = [](uint16_t ver) -> std::string {
|
||||
if (ver == VER_NDX_LOCAL)
|
||||
@ -207,10 +220,11 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
|
||||
// Assign the version.
|
||||
for (Symbol *sym : syms) {
|
||||
// Skip symbols containing version info because symbol versions
|
||||
// specified by symbol names take precedence over version scripts.
|
||||
// See parseSymbolVersion().
|
||||
if (sym->getName().contains('@'))
|
||||
// For a non-local versionId, skip symbols containing version info because
|
||||
// symbol versions specified by symbol names take precedence over version
|
||||
// scripts. See parseSymbolVersion().
|
||||
if (!includeNonDefault && versionId != VER_NDX_LOCAL &&
|
||||
sym->getName().contains('@'))
|
||||
continue;
|
||||
|
||||
// If the version has not been assigned, verdefIndex is -1. Use an arbitrary
|
||||
@ -225,13 +239,15 @@ void SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
warn("attempt to reassign symbol '" + ver.name + "' of " +
|
||||
getName(sym->versionId) + " to " + getName(versionId));
|
||||
}
|
||||
return !syms.empty();
|
||||
}
|
||||
|
||||
void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
|
||||
void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
|
||||
bool includeNonDefault) {
|
||||
// Exact matching takes precedence over fuzzy matching,
|
||||
// so we set a version to a symbol only if no version has been assigned
|
||||
// to the symbol. This behavior is compatible with GNU.
|
||||
for (Symbol *sym : findAllByVersion(ver))
|
||||
for (Symbol *sym : findAllByVersion(ver, includeNonDefault))
|
||||
if (sym->verdefIndex == UINT32_C(-1)) {
|
||||
sym->verdefIndex = 0;
|
||||
sym->versionId = versionId;
|
||||
@ -244,26 +260,60 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId) {
|
||||
// script file, the script does not actually define any symbol version,
|
||||
// but just specifies symbols visibilities.
|
||||
void SymbolTable::scanVersionScript() {
|
||||
SmallString<128> buf;
|
||||
// First, we assign versions to exact matching symbols,
|
||||
// i.e. version definitions not containing any glob meta-characters.
|
||||
for (VersionDefinition &v : config->versionDefinitions)
|
||||
for (SymbolVersion &pat : v.patterns)
|
||||
assignExactVersion(pat, v.id, v.name);
|
||||
std::vector<Symbol *> syms;
|
||||
for (VersionDefinition &v : config->versionDefinitions) {
|
||||
auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
|
||||
bool found =
|
||||
assignExactVersion(pat, id, ver, /*includeNonDefault=*/false);
|
||||
buf.clear();
|
||||
found |= assignExactVersion({(pat.name + "@" + v.name).toStringRef(buf),
|
||||
pat.isExternCpp, /*hasWildCard=*/false},
|
||||
id, ver, /*includeNonDefault=*/true);
|
||||
if (!found && !config->undefinedVersion)
|
||||
errorOrWarn("version script assignment of '" + ver + "' to symbol '" +
|
||||
pat.name + "' failed: symbol not defined");
|
||||
};
|
||||
for (SymbolVersion &pat : v.nonLocalPatterns)
|
||||
if (!pat.hasWildcard)
|
||||
assignExact(pat, v.id, v.name);
|
||||
for (SymbolVersion pat : v.localPatterns)
|
||||
if (!pat.hasWildcard)
|
||||
assignExact(pat, VER_NDX_LOCAL, "local");
|
||||
}
|
||||
|
||||
// Next, assign versions to wildcards that are not "*". Note that because the
|
||||
// last match takes precedence over previous matches, we iterate over the
|
||||
// definitions in the reverse order.
|
||||
for (VersionDefinition &v : llvm::reverse(config->versionDefinitions))
|
||||
for (SymbolVersion &pat : v.patterns)
|
||||
auto assignWildcard = [&](SymbolVersion pat, uint16_t id, StringRef ver) {
|
||||
assignWildcardVersion(pat, id, /*includeNonDefault=*/false);
|
||||
buf.clear();
|
||||
assignWildcardVersion({(pat.name + "@" + ver).toStringRef(buf),
|
||||
pat.isExternCpp, /*hasWildCard=*/true},
|
||||
id,
|
||||
/*includeNonDefault=*/true);
|
||||
};
|
||||
for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) {
|
||||
for (SymbolVersion &pat : v.nonLocalPatterns)
|
||||
if (pat.hasWildcard && pat.name != "*")
|
||||
assignWildcardVersion(pat, v.id);
|
||||
assignWildcard(pat, v.id, v.name);
|
||||
for (SymbolVersion &pat : v.localPatterns)
|
||||
if (pat.hasWildcard && pat.name != "*")
|
||||
assignWildcard(pat, VER_NDX_LOCAL, v.name);
|
||||
}
|
||||
|
||||
// Then, assign versions to "*". In GNU linkers they have lower priority than
|
||||
// other wildcards.
|
||||
for (VersionDefinition &v : config->versionDefinitions)
|
||||
for (SymbolVersion &pat : v.patterns)
|
||||
for (VersionDefinition &v : config->versionDefinitions) {
|
||||
for (SymbolVersion &pat : v.nonLocalPatterns)
|
||||
if (pat.hasWildcard && pat.name == "*")
|
||||
assignWildcardVersion(pat, v.id);
|
||||
assignWildcard(pat, v.id, v.name);
|
||||
for (SymbolVersion &pat : v.localPatterns)
|
||||
if (pat.hasWildcard && pat.name == "*")
|
||||
assignWildcard(pat, VER_NDX_LOCAL, v.name);
|
||||
}
|
||||
|
||||
// Symbol themselves might know their versions because symbols
|
||||
// can contain versions in the form of <name>@<version>.
|
||||
|
@ -65,12 +65,14 @@ public:
|
||||
|
||||
private:
|
||||
std::vector<Symbol *> findByVersion(SymbolVersion ver);
|
||||
std::vector<Symbol *> findAllByVersion(SymbolVersion ver);
|
||||
std::vector<Symbol *> findAllByVersion(SymbolVersion ver,
|
||||
bool includeNonDefault);
|
||||
|
||||
llvm::StringMap<std::vector<Symbol *>> &getDemangledSyms();
|
||||
void assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
StringRef versionName);
|
||||
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId);
|
||||
bool assignExactVersion(SymbolVersion ver, uint16_t versionId,
|
||||
StringRef versionName, bool includeNonDefault);
|
||||
void assignWildcardVersion(SymbolVersion ver, uint16_t versionId,
|
||||
bool includeNonDefault);
|
||||
|
||||
// The order the global symbols are in is not defined. We can use an arbitrary
|
||||
// order, but it has to be reproducible. That is true even when cross linking.
|
||||
|
@ -208,6 +208,9 @@ OutputSection *Symbol::getOutputSection() const {
|
||||
// If a symbol name contains '@', the characters after that is
|
||||
// a symbol version name. This function parses that.
|
||||
void Symbol::parseSymbolVersion() {
|
||||
// Return if localized by a local: pattern in a version script.
|
||||
if (versionId == VER_NDX_LOCAL)
|
||||
return;
|
||||
StringRef s = getName();
|
||||
size_t pos = s.find('@');
|
||||
if (pos == 0 || pos == StringRef::npos)
|
||||
|
@ -24,6 +24,13 @@ Non-comprehensive list of changes in this release
|
||||
ELF Improvements
|
||||
----------------
|
||||
|
||||
* ``-z start-stop-gc`` is now supported and becomes the default.
|
||||
(`D96914 <https://reviews.llvm.org/D96914>`_)
|
||||
(`rG6d2d3bd0 <https://reviews.llvm.org/rG6d2d3bd0a61f5fc7fd9f61f48bc30e9ca77cc619>`_)
|
||||
* ``--shuffle-sections=<seed>`` has been changed to ``--shuffle-sections=<section-glob>=<seed>``.
|
||||
If seed is -1, the matched input sections are reversed.
|
||||
(`D98445 <https://reviews.llvm.org/D98445>`_)
|
||||
(`D98679 <https://reviews.llvm.org/D98679>`_)
|
||||
* ``-Bsymbolic -Bsymbolic-functions`` has been changed to behave the same as ``-Bsymbolic-functions``. This matches GNU ld.
|
||||
(`D102461 <https://reviews.llvm.org/D102461>`_)
|
||||
* ``-Bno-symbolic`` has been added.
|
||||
@ -32,6 +39,75 @@ ELF Improvements
|
||||
(`D103303 <https://reviews.llvm.org/D103303>`_)
|
||||
* ``-Bsymbolic-non-weak-functions`` has been added as a ``STB_GLOBAL`` subset of ``-Bsymbolic-functions``.
|
||||
(`D102570 <https://reviews.llvm.org/D102570>`_)
|
||||
* ``--no-allow-shlib-undefined`` has been improved to catch more cases.
|
||||
(`D101996 <https://reviews.llvm.org/D101996>`_)
|
||||
* ``__rela_iplt_start`` is no longer defined for -pie/-shared.
|
||||
This makes GCC/Clang ``-static-pie`` built executables work.
|
||||
(`rG8cb78e99 <https://reviews.llvm.org/rf8cb78e99aae9aa3f89f7bfe667db2c5b767f21f>`_)
|
||||
* IRELATIVE/TLSDESC relocations now support ``-z rel``.
|
||||
(`D100544 <https://reviews.llvm.org/D100544>`_)
|
||||
* Section groups with a zero flag are now supported.
|
||||
This is used by ``comdat nodeduplicate`` in LLVM IR.
|
||||
(`D96636 <https://reviews.llvm.org/D96636>`_)
|
||||
(`D106228 <https://reviews.llvm.org/D106228>`_)
|
||||
* Defined symbols are now resolved before undefined symbols to stabilize the bheavior of archive member extraction.
|
||||
(`D95985 <https://reviews.llvm.org/D95985>`_)
|
||||
* ``STB_WEAK`` symbols are now preferred over COMMON symbols as a fix to a ``--fortran-common`` regression.
|
||||
(`D105945 <https://reviews.llvm.org/D105945>`_)
|
||||
* Absolute relocations referencing undef weak now produce dynamic relocations for -pie, matching GOT-generating relocations.
|
||||
(`D105164 <https://reviews.llvm.org/D105164>`_)
|
||||
* Exported symbols are now communicated to the LTO library so as to make LTO
|
||||
based whole program devirtualization (``-flto=thin -fwhole-program-vtables``)
|
||||
work with shared objects.
|
||||
(`D91583 <https://reviews.llvm.org/D91583>`_)
|
||||
* Whole program devirtualization now respects ``local:`` version nodes in a version script.
|
||||
(`D98220 <https://reviews.llvm.org/D98220>`_)
|
||||
(`D98686 <https://reviews.llvm.org/D98686>`_)
|
||||
* ``local:`` version nodes in a version script now apply to non-default version symbols.
|
||||
(`D107234 <https://reviews.llvm.org/D107234>`_)
|
||||
* If an object file defines both ``foo`` and ``foo@v1``, now only ``foo@v1`` will be in the output.
|
||||
(`D107235 <https://reviews.llvm.org/D107235>`_)
|
||||
* Copy relocations on non-default version symbols are now supported.
|
||||
(`D107535 <https://reviews.llvm.org/D107535>`_)
|
||||
|
||||
Linker script changes:
|
||||
|
||||
* ``.``, ``$``, and double quotes can now be used in symbol names in expressions.
|
||||
(`D98306 <https://reviews.llvm.org/D98306>`_)
|
||||
(`rGe7a7ad13 <https://reviews.llvm.org/rGe7a7ad134fe182aad190cb3ebc441164470e92f5>`_)
|
||||
* Fixed value of ``.`` in the output section description of ``.tbss``.
|
||||
(`D107288 <https://reviews.llvm.org/D107288>`_)
|
||||
* ``NOLOAD`` sections can now be placed in a ``PT_LOAD`` program header.
|
||||
(`D103815 <https://reviews.llvm.org/D103815>`_)
|
||||
* ``OUTPUT_FORMAT(default, big, little)`` now consults ``-EL`` and ``-EB``.
|
||||
(`D96214 <https://reviews.llvm.org/D96214>`_)
|
||||
* The ``OVERWRITE_SECTIONS`` command has been added.
|
||||
(`D103303 <https://reviews.llvm.org/D103303>`_)
|
||||
* The section order within an ``INSERT AFTER`` command is now preserved.
|
||||
(`D105158 <https://reviews.llvm.org/D105158>`_)
|
||||
|
||||
Architecture specific changes:
|
||||
|
||||
* aarch64_be is now supported.
|
||||
(`D96188 <https://reviews.llvm.org/D96188>`_)
|
||||
* The AMDGPU port now supports ``--amdhsa-code-object-version=4`` object files;
|
||||
(`D95811 <https://reviews.llvm.org/D95811>`_)
|
||||
* The ARM port now accounts for PC biases in range extension thunk creation.
|
||||
(`D97550 <https://reviews.llvm.org/D97550>`_)
|
||||
* The AVR port now computes ``e_flags``.
|
||||
(`D99754 <https://reviews.llvm.org/D99754>`_)
|
||||
* The Mips port now omits unneeded dynamic relocations for PIE non-preemptible TLS.
|
||||
(`D101382 <https://reviews.llvm.org/D101382>`_)
|
||||
* The PowerPC port now supports ``--power10-stubs=no`` to omit Power10 instructions from call stubs.
|
||||
(`D94625 <https://reviews.llvm.org/D94625>`_)
|
||||
* Fixed a thunk creation bug in the PowerPC port when TOC/NOTOC calls are mixed.
|
||||
(`D101837 <https://reviews.llvm.org/D101837>`_)
|
||||
* The RISC-V port now resolves undefined weak relocations to the current location if not using PLT.
|
||||
(`D103001 <https://reviews.llvm.org/D103001>`_)
|
||||
* ``R_386_GOTOFF`` relocations from .debug_info are now allowed to be compatible with GCC.
|
||||
(`D95994 <https://reviews.llvm.org/D95994>`_)
|
||||
* ``gotEntrySize`` has been added to improve support for the ILP32 ABI of x86-64.
|
||||
(`D102569 <https://reviews.llvm.org/D102569>`_)
|
||||
|
||||
Breaking changes
|
||||
----------------
|
||||
@ -42,17 +118,75 @@ Breaking changes
|
||||
COFF Improvements
|
||||
-----------------
|
||||
|
||||
* ...
|
||||
* Avoid thread exhaustion when running on 32 bit Windows.
|
||||
(`D105506 <https://reviews.llvm.org/D105506>`_)
|
||||
|
||||
* Improve terminating the process on Windows while a thread pool might be
|
||||
running. (`D102944 <https://reviews.llvm.org/D102944>`_)
|
||||
|
||||
MinGW Improvements
|
||||
------------------
|
||||
|
||||
* ...
|
||||
* Support for linking directly against a DLL without using an import library
|
||||
has been added. (`D104530 <https://reviews.llvm.org/D104530>`_ and
|
||||
`D104531 <https://reviews.llvm.org/D104531>`_)
|
||||
|
||||
MachO Improvements
|
||||
------------------
|
||||
* Fix linking with ``--export-all-symbols`` in combination with
|
||||
``-function-sections``. (`D101522 <https://reviews.llvm.org/D101522>`_ and
|
||||
`D101615 <https://reviews.llvm.org/D101615>`_)
|
||||
|
||||
* Item 1.
|
||||
* Fix automatic export of symbols from LTO objects.
|
||||
(`D101569 <https://reviews.llvm.org/D101569>`_)
|
||||
|
||||
* Accept more spellings of some options.
|
||||
(`D107237 <https://reviews.llvm.org/D107237>`_ and
|
||||
`D107253 <https://reviews.llvm.org/D107253>`_)
|
||||
|
||||
Mach-O Improvements
|
||||
-------------------
|
||||
|
||||
The Mach-O backend is now able to link several large, real-world programs,
|
||||
though we are still working out the kinks.
|
||||
|
||||
* arm64 is now supported as a target. (`D88629 <https://reviews.llvm.org/D88629>`_)
|
||||
* arm64_32 is now supported as a target. (`D99822 <https://reviews.llvm.org/D99822>`_)
|
||||
* Branch-range-extension thunks are now supported. (`D100818 <https://reviews.llvm.org/D100818>`_)
|
||||
* ``-dead_strip`` is now supported. (`D103324 <https://reviews.llvm.org/D103324>`_)
|
||||
* Support for identical code folding (``--icf=all``) has been added.
|
||||
(`D103292 <https://reviews.llvm.org/D103292>`_)
|
||||
* Support for special ``$start`` and ``$end`` symbols for segment & sections has been
|
||||
added. (`D106767 <https://reviews.llvm.org/D106767>`_, `D106629 <https://reviews.llvm.org/D106629>`_)
|
||||
* ``$ld$previous`` symbols are now supported. (`D103505 <https://reviews.llvm.org/D103505 >`_)
|
||||
* ``$ld$install_name`` symbols are now supported. (`D103746 <https://reviews.llvm.org/D103746>`_)
|
||||
* ``__mh_*_header`` symbols are now supported. (`D97007 <https://reviews.llvm.org/D97007>`_)
|
||||
* LC_CODE_SIGNATURE is now supported. (`D96164 <https://reviews.llvm.org/D96164>`_)
|
||||
* LC_FUNCTION_STARTS is now supported. (`D97260 <https://reviews.llvm.org/D97260>`_)
|
||||
* LC_DATA_IN_CODE is now supported. (`D103006 <https://reviews.llvm.org/D103006>`_)
|
||||
* Bind opcodes are more compactly encoded. (`D106128 <https://reviews.llvm.org/D106128>`_,
|
||||
`D105075 <https://reviews.llvm.org/D105075>`_)
|
||||
* LTO cache support has been added. (`D105922 <https://reviews.llvm.org/D105922>`_)
|
||||
* ``-application_extension`` is now supported. (`D105818 <https://reviews.llvm.org/D105818>`_)
|
||||
* ``-export_dynamic`` is now partially supported. (`D105482 <https://reviews.llvm.org/D105482>`_)
|
||||
* ``-arch_multiple`` is now supported. (`D105450 <https://reviews.llvm.org/D105450>`_)
|
||||
* ``-final_output`` is now supported. (`D105449 <https://reviews.llvm.org/D105449>`_)
|
||||
* ``-umbrella`` is now supported. (`D105448 <https://reviews.llvm.org/D105448>`_)
|
||||
* ``--print-dylib-search`` is now supported. (`D103985 <https://reviews.llvm.org/D103985>`_)
|
||||
* ``-force_load_swift_libs`` is now supported. (`D103709 <https://reviews.llvm.org/D103709>`_)
|
||||
* ``-reexport_framework``, ``-reexport_library``, ``-reexport-l`` are now supported.
|
||||
(`D103497 <https://reviews.llvm.org/D103497>`_)
|
||||
* ``.weak_def_can_be_hidden`` is now supported. (`D101080 <https://reviews.llvm.org/D101080>`_)
|
||||
* ``-add_ast_path`` is now supported. (`D100076 <https://reviews.llvm.org/D100076>`_)
|
||||
* ``-segprot`` is now supported. (`D99389 <https://reviews.llvm.org/D99389>`_)
|
||||
* ``-dependency_info`` is now partially supported. (`D98559 <https://reviews.llvm.org/D98559>`_)
|
||||
* ``--time-trace`` is now supported. (`D98419 <https://reviews.llvm.org/D98419>`_)
|
||||
* ``-mark_dead_strippable_dylib`` is now supported. (`D98262 <https://reviews.llvm.org/D98262>`_)
|
||||
* ``-[un]exported_symbol[s_list]`` is now supported. (`D98223 <https://reviews.llvm.org/D98223>`_)
|
||||
* ``-flat_namespace`` is now supported. (`D97641 <https://reviews.llvm.org/D97641>`_)
|
||||
* ``-rename_section`` and ``-rename_segment`` are now supported. (`D97600 <https://reviews.llvm.org/D97600>`_)
|
||||
* ``-bundle_loader`` is now supported. (`D95913 <https://reviews.llvm.org/D95913>`_)
|
||||
* ``-map`` is now partially supported. (`D98323 <https://reviews.llvm.org/D98323>`_)
|
||||
|
||||
There were numerous other bug-fixes as well.
|
||||
|
||||
WebAssembly Improvements
|
||||
------------------------
|
||||
|
@ -7,8 +7,11 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "CommandObjectMemoryTag.h"
|
||||
#include "lldb/Host/OptionParser.h"
|
||||
#include "lldb/Interpreter/CommandReturnObject.h"
|
||||
#include "lldb/Interpreter/OptionArgParser.h"
|
||||
#include "lldb/Interpreter/OptionGroupFormat.h"
|
||||
#include "lldb/Interpreter/OptionValueString.h"
|
||||
#include "lldb/Target/Process.h"
|
||||
|
||||
using namespace lldb;
|
||||
@ -21,7 +24,8 @@ class CommandObjectMemoryTagRead : public CommandObjectParsed {
|
||||
public:
|
||||
CommandObjectMemoryTagRead(CommandInterpreter &interpreter)
|
||||
: CommandObjectParsed(interpreter, "tag",
|
||||
"Read memory tags for the given range of memory.",
|
||||
"Read memory tags for the given range of memory."
|
||||
" Mismatched tags will be marked.",
|
||||
nullptr,
|
||||
eCommandRequiresTarget | eCommandRequiresProcess |
|
||||
eCommandProcessMustBePaused) {
|
||||
@ -97,16 +101,17 @@ protected:
|
||||
return false;
|
||||
}
|
||||
|
||||
result.AppendMessageWithFormatv("Logical tag: {0:x}",
|
||||
tag_manager->GetLogicalTag(start_addr));
|
||||
lldb::addr_t logical_tag = tag_manager->GetLogicalTag(start_addr);
|
||||
result.AppendMessageWithFormatv("Logical tag: {0:x}", logical_tag);
|
||||
result.AppendMessage("Allocation tags:");
|
||||
|
||||
addr_t addr = tagged_range->GetRangeBase();
|
||||
for (auto tag : *tags) {
|
||||
addr_t next_addr = addr + tag_manager->GetGranuleSize();
|
||||
// Showing tagged adresses here until we have non address bit handling
|
||||
result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}", addr, next_addr,
|
||||
tag);
|
||||
result.AppendMessageWithFormatv("[{0:x}, {1:x}): {2:x}{3}", addr,
|
||||
next_addr, tag,
|
||||
logical_tag == tag ? "" : " (mismatch)");
|
||||
addr = next_addr;
|
||||
}
|
||||
|
||||
@ -115,6 +120,168 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
#define LLDB_OPTIONS_memory_tag_write
|
||||
#include "CommandOptions.inc"
|
||||
|
||||
class CommandObjectMemoryTagWrite : public CommandObjectParsed {
|
||||
public:
|
||||
class OptionGroupTagWrite : public OptionGroup {
|
||||
public:
|
||||
OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {}
|
||||
|
||||
~OptionGroupTagWrite() override = default;
|
||||
|
||||
llvm::ArrayRef<OptionDefinition> GetDefinitions() override {
|
||||
return llvm::makeArrayRef(g_memory_tag_write_options);
|
||||
}
|
||||
|
||||
Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_value,
|
||||
ExecutionContext *execution_context) override {
|
||||
Status status;
|
||||
const int short_option =
|
||||
g_memory_tag_write_options[option_idx].short_option;
|
||||
|
||||
switch (short_option) {
|
||||
case 'e':
|
||||
m_end_addr = OptionArgParser::ToAddress(execution_context, option_value,
|
||||
LLDB_INVALID_ADDRESS, &status);
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unimplemented option");
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void OptionParsingStarting(ExecutionContext *execution_context) override {
|
||||
m_end_addr = LLDB_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
lldb::addr_t m_end_addr;
|
||||
};
|
||||
|
||||
CommandObjectMemoryTagWrite(CommandInterpreter &interpreter)
|
||||
: CommandObjectParsed(interpreter, "tag",
|
||||
"Write memory tags starting from the granule that "
|
||||
"contains the given address.",
|
||||
nullptr,
|
||||
eCommandRequiresTarget | eCommandRequiresProcess |
|
||||
eCommandProcessMustBePaused),
|
||||
m_option_group(), m_tag_write_options() {
|
||||
// Address
|
||||
m_arguments.push_back(
|
||||
CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)});
|
||||
// One or more tag values
|
||||
m_arguments.push_back(CommandArgumentEntry{
|
||||
CommandArgumentData(eArgTypeValue, eArgRepeatPlus)});
|
||||
|
||||
m_option_group.Append(&m_tag_write_options);
|
||||
m_option_group.Finalize();
|
||||
}
|
||||
|
||||
~CommandObjectMemoryTagWrite() override = default;
|
||||
|
||||
Options *GetOptions() override { return &m_option_group; }
|
||||
|
||||
protected:
|
||||
bool DoExecute(Args &command, CommandReturnObject &result) override {
|
||||
if (command.GetArgumentCount() < 2) {
|
||||
result.AppendError("wrong number of arguments; expected "
|
||||
"<address-expression> <tag> [<tag> [...]]");
|
||||
return false;
|
||||
}
|
||||
|
||||
Status error;
|
||||
addr_t start_addr = OptionArgParser::ToAddress(
|
||||
&m_exe_ctx, command[0].ref(), LLDB_INVALID_ADDRESS, &error);
|
||||
if (start_addr == LLDB_INVALID_ADDRESS) {
|
||||
result.AppendErrorWithFormatv("Invalid address expression, {0}",
|
||||
error.AsCString());
|
||||
return false;
|
||||
}
|
||||
|
||||
command.Shift(); // shift off start address
|
||||
|
||||
std::vector<lldb::addr_t> tags;
|
||||
for (auto &entry : command) {
|
||||
lldb::addr_t tag_value;
|
||||
// getAsInteger returns true on failure
|
||||
if (entry.ref().getAsInteger(0, tag_value)) {
|
||||
result.AppendErrorWithFormat(
|
||||
"'%s' is not a valid unsigned decimal string value.\n",
|
||||
entry.c_str());
|
||||
return false;
|
||||
}
|
||||
tags.push_back(tag_value);
|
||||
}
|
||||
|
||||
Process *process = m_exe_ctx.GetProcessPtr();
|
||||
llvm::Expected<const MemoryTagManager *> tag_manager_or_err =
|
||||
process->GetMemoryTagManager();
|
||||
|
||||
if (!tag_manager_or_err) {
|
||||
result.SetError(Status(tag_manager_or_err.takeError()));
|
||||
return false;
|
||||
}
|
||||
|
||||
const MemoryTagManager *tag_manager = *tag_manager_or_err;
|
||||
|
||||
MemoryRegionInfos memory_regions;
|
||||
// If this fails the list of regions is cleared, so we don't need to read
|
||||
// the return status here.
|
||||
process->GetMemoryRegions(memory_regions);
|
||||
|
||||
// We have to assume start_addr is not granule aligned.
|
||||
// So if we simply made a range:
|
||||
// (start_addr, start_addr + (N * granule_size))
|
||||
// We would end up with a range that isn't N granules but N+1
|
||||
// granules. To avoid this we'll align the start first using the method that
|
||||
// doesn't check memory attributes. (if the final range is untagged we'll
|
||||
// handle that error later)
|
||||
lldb::addr_t aligned_start_addr =
|
||||
tag_manager->ExpandToGranule(MemoryTagManager::TagRange(start_addr, 1))
|
||||
.GetRangeBase();
|
||||
|
||||
lldb::addr_t end_addr = 0;
|
||||
// When you have an end address you want to align the range like tag read
|
||||
// does. Meaning, align the start down (which we've done) and align the end
|
||||
// up.
|
||||
if (m_tag_write_options.m_end_addr != LLDB_INVALID_ADDRESS)
|
||||
end_addr = m_tag_write_options.m_end_addr;
|
||||
else
|
||||
// Without an end address assume number of tags matches number of granules
|
||||
// to write to
|
||||
end_addr =
|
||||
aligned_start_addr + (tags.size() * tag_manager->GetGranuleSize());
|
||||
|
||||
// Now we've aligned the start address so if we ask for another range
|
||||
// using the number of tags N, we'll get back a range that is also N
|
||||
// granules in size.
|
||||
llvm::Expected<MemoryTagManager::TagRange> tagged_range =
|
||||
tag_manager->MakeTaggedRange(aligned_start_addr, end_addr,
|
||||
memory_regions);
|
||||
|
||||
if (!tagged_range) {
|
||||
result.SetError(Status(tagged_range.takeError()));
|
||||
return false;
|
||||
}
|
||||
|
||||
Status status = process->WriteMemoryTags(tagged_range->GetRangeBase(),
|
||||
tagged_range->GetByteSize(), tags);
|
||||
|
||||
if (status.Fail()) {
|
||||
result.SetError(status);
|
||||
return false;
|
||||
}
|
||||
|
||||
result.SetStatus(eReturnStatusSuccessFinishResult);
|
||||
return true;
|
||||
}
|
||||
|
||||
OptionGroupOptions m_option_group;
|
||||
OptionGroupTagWrite m_tag_write_options;
|
||||
};
|
||||
|
||||
CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
|
||||
: CommandObjectMultiword(
|
||||
interpreter, "tag", "Commands for manipulating memory tags",
|
||||
@ -123,6 +290,11 @@ CommandObjectMemoryTag::CommandObjectMemoryTag(CommandInterpreter &interpreter)
|
||||
new CommandObjectMemoryTagRead(interpreter));
|
||||
read_command_object->SetCommandName("memory tag read");
|
||||
LoadSubCommand("read", read_command_object);
|
||||
|
||||
CommandObjectSP write_command_object(
|
||||
new CommandObjectMemoryTagWrite(interpreter));
|
||||
write_command_object->SetCommandName("memory tag write");
|
||||
LoadSubCommand("write", write_command_object);
|
||||
}
|
||||
|
||||
CommandObjectMemoryTag::~CommandObjectMemoryTag() = default;
|
||||
|
@ -504,6 +504,14 @@ let Command = "memory write" in {
|
||||
Desc<"Start writing bytes from an offset within the input file.">;
|
||||
}
|
||||
|
||||
let Command = "memory tag write" in {
|
||||
def memory_write_end_addr : Option<"end-addr", "e">, Group<1>,
|
||||
Arg<"AddressOrExpression">, Desc<
|
||||
"Set tags for start address to end-addr, repeating tags as needed"
|
||||
" to cover the range. (instead of calculating the range from the"
|
||||
" number of tags given)">;
|
||||
}
|
||||
|
||||
let Command = "register read" in {
|
||||
def register_read_alternate : Option<"alternate", "A">,
|
||||
Desc<"Display register names using the alternate register name if there "
|
||||
|
@ -3474,15 +3474,31 @@ GDBRemoteCommunicationServerLLGS::Handle_qMemTags(
|
||||
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
|
||||
return SendIllFormedResponse(packet, invalid_type_err);
|
||||
|
||||
int32_t type =
|
||||
packet.GetS32(std::numeric_limits<int32_t>::max(), /*base=*/16);
|
||||
if (type == std::numeric_limits<int32_t>::max() ||
|
||||
// Type is a signed integer but packed into the packet as its raw bytes.
|
||||
// However, our GetU64 uses strtoull which allows +/-. We do not want this.
|
||||
const char *first_type_char = packet.Peek();
|
||||
if (first_type_char && (*first_type_char == '+' || *first_type_char == '-'))
|
||||
return SendIllFormedResponse(packet, invalid_type_err);
|
||||
|
||||
// Extract type as unsigned then cast to signed.
|
||||
// Using a uint64_t here so that we have some value outside of the 32 bit
|
||||
// range to use as the invalid return value.
|
||||
uint64_t raw_type =
|
||||
packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
|
||||
|
||||
if ( // Make sure the cast below would be valid
|
||||
raw_type > std::numeric_limits<uint32_t>::max() ||
|
||||
// To catch inputs like "123aardvark" that will parse but clearly aren't
|
||||
// valid in this case.
|
||||
packet.GetBytesLeft()) {
|
||||
return SendIllFormedResponse(packet, invalid_type_err);
|
||||
}
|
||||
|
||||
// First narrow to 32 bits otherwise the copy into type would take
|
||||
// the wrong 4 bytes on big endian.
|
||||
uint32_t raw_type_32 = raw_type;
|
||||
int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
|
||||
|
||||
StreamGDBRemote response;
|
||||
std::vector<uint8_t> tags;
|
||||
Status error = m_current_process->ReadMemoryTags(type, addr, length, tags);
|
||||
@ -3552,7 +3568,11 @@ GDBRemoteCommunicationServerLLGS::Handle_QMemTags(
|
||||
packet.GetU64(std::numeric_limits<uint64_t>::max(), /*base=*/16);
|
||||
if (raw_type > std::numeric_limits<uint32_t>::max())
|
||||
return SendIllFormedResponse(packet, invalid_type_err);
|
||||
int32_t type = static_cast<int32_t>(raw_type);
|
||||
|
||||
// First narrow to 32 bits. Otherwise the copy below would get the wrong
|
||||
// 4 bytes on big endian.
|
||||
uint32_t raw_type_32 = raw_type;
|
||||
int32_t type = reinterpret_cast<int32_t &>(raw_type_32);
|
||||
|
||||
// Tag data
|
||||
if (packet.GetBytesLeft() < 1 || packet.GetChar() != ':')
|
||||
|
@ -223,62 +223,32 @@ void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) {
|
||||
llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
|
||||
lldb::LanguageType language,
|
||||
llvm::Optional<CreateCallback> create_callback) {
|
||||
llvm::Error error = llvm::Error::success();
|
||||
assert(!error); // Check the success value when assertions are enabled
|
||||
std::lock_guard<std::mutex> guard(m_mutex);
|
||||
if (m_clear_in_progress) {
|
||||
error = llvm::make_error<llvm::StringError>(
|
||||
if (m_clear_in_progress)
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"Unable to get TypeSystem because TypeSystemMap is being cleared",
|
||||
llvm::inconvertibleErrorCode());
|
||||
} else {
|
||||
collection::iterator pos = m_map.find(language);
|
||||
if (pos != m_map.end()) {
|
||||
auto *type_system = pos->second.get();
|
||||
if (type_system) {
|
||||
llvm::consumeError(std::move(error));
|
||||
return *type_system;
|
||||
}
|
||||
error = llvm::make_error<llvm::StringError>(
|
||||
"TypeSystem for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)) +
|
||||
" doesn't exist",
|
||||
llvm::inconvertibleErrorCode());
|
||||
return std::move(error);
|
||||
}
|
||||
|
||||
for (const auto &pair : m_map) {
|
||||
if (pair.second && pair.second->SupportsLanguage(language)) {
|
||||
// Add a new mapping for "language" to point to an already existing
|
||||
// TypeSystem that supports this language
|
||||
m_map[language] = pair.second;
|
||||
if (pair.second.get()) {
|
||||
llvm::consumeError(std::move(error));
|
||||
return *pair.second.get();
|
||||
}
|
||||
error = llvm::make_error<llvm::StringError>(
|
||||
"TypeSystem for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)) +
|
||||
" doesn't exist",
|
||||
llvm::inconvertibleErrorCode());
|
||||
return std::move(error);
|
||||
}
|
||||
}
|
||||
collection::iterator pos = m_map.find(language);
|
||||
if (pos != m_map.end()) {
|
||||
auto *type_system = pos->second.get();
|
||||
if (type_system)
|
||||
return *type_system;
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"TypeSystem for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)) +
|
||||
" doesn't exist",
|
||||
llvm::inconvertibleErrorCode());
|
||||
}
|
||||
|
||||
if (!create_callback) {
|
||||
error = llvm::make_error<llvm::StringError>(
|
||||
"Unable to find type system for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)),
|
||||
llvm::inconvertibleErrorCode());
|
||||
} else {
|
||||
// Cache even if we get a shared pointer that contains a null type system
|
||||
// back
|
||||
TypeSystemSP type_system_sp = (*create_callback)();
|
||||
m_map[language] = type_system_sp;
|
||||
if (type_system_sp.get()) {
|
||||
llvm::consumeError(std::move(error));
|
||||
return *type_system_sp.get();
|
||||
}
|
||||
error = llvm::make_error<llvm::StringError>(
|
||||
for (const auto &pair : m_map) {
|
||||
if (pair.second && pair.second->SupportsLanguage(language)) {
|
||||
// Add a new mapping for "language" to point to an already existing
|
||||
// TypeSystem that supports this language
|
||||
m_map[language] = pair.second;
|
||||
if (pair.second.get())
|
||||
return *pair.second.get();
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"TypeSystem for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)) +
|
||||
" doesn't exist",
|
||||
@ -286,7 +256,23 @@ llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
|
||||
}
|
||||
}
|
||||
|
||||
return std::move(error);
|
||||
if (!create_callback)
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"Unable to find type system for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)),
|
||||
llvm::inconvertibleErrorCode());
|
||||
|
||||
// Cache even if we get a shared pointer that contains a null type system
|
||||
// back
|
||||
TypeSystemSP type_system_sp = (*create_callback)();
|
||||
m_map[language] = type_system_sp;
|
||||
if (type_system_sp.get())
|
||||
return *type_system_sp.get();
|
||||
return llvm::make_error<llvm::StringError>(
|
||||
"TypeSystem for language " +
|
||||
llvm::StringRef(Language::GetNameForLanguageType(language)) +
|
||||
" doesn't exist",
|
||||
llvm::inconvertibleErrorCode());
|
||||
}
|
||||
|
||||
llvm::Expected<TypeSystem &>
|
||||
|
@ -744,6 +744,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
|
||||
/// minimum/maximum flavor.
|
||||
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
|
||||
|
||||
/// Return the minimum or maximum constant value for the specified integer
|
||||
/// min/max flavor and type.
|
||||
APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth);
|
||||
|
||||
/// Check if the values in \p VL are select instructions that can be converted
|
||||
/// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
|
||||
/// conversion is possible, together with a bool indicating whether all select
|
||||
|
@ -324,6 +324,9 @@ public:
|
||||
/// name is not found.
|
||||
GlobalValue *getNamedValue(StringRef Name) const;
|
||||
|
||||
/// Return the number of global values in the module.
|
||||
unsigned getNumNamedValues() const;
|
||||
|
||||
/// Return a unique non-zero ID for the specified metadata kind. This ID is
|
||||
/// uniqued across modules in the current LLVMContext.
|
||||
unsigned getMDKindID(StringRef Name) const;
|
||||
|
@ -1104,6 +1104,7 @@ namespace RawInstrProf {
|
||||
// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
|
||||
// sensitive records.
|
||||
// Version 6: Added binary id.
|
||||
// Version 7: Reorder binary id and include version in signature.
|
||||
const uint64_t Version = INSTR_PROF_RAW_VERSION;
|
||||
|
||||
template <class IntPtrT> inline uint64_t getMagic();
|
||||
|
@ -129,6 +129,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
|
||||
#endif
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
|
||||
@ -137,7 +138,6 @@ INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
|
||||
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
|
||||
#undef INSTR_PROF_RAW_HEADER
|
||||
/* INSTR_PROF_RAW_HEADER end */
|
||||
|
||||
@ -646,7 +646,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
|
||||
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
|
||||
|
||||
/* Raw profile format version (start from 1). */
|
||||
#define INSTR_PROF_RAW_VERSION 6
|
||||
#define INSTR_PROF_RAW_VERSION 7
|
||||
/* Indexed profile format version (start from 1). */
|
||||
#define INSTR_PROF_INDEX_VERSION 7
|
||||
/* Coverage mapping format version (start from 0). */
|
||||
|
@ -1855,6 +1855,10 @@ public:
|
||||
///
|
||||
static void createShallowWrapper(Function &F);
|
||||
|
||||
/// Returns true if the function \p F can be internalized. i.e. it has a
|
||||
/// compatible linkage.
|
||||
static bool isInternalizable(Function &F);
|
||||
|
||||
/// Make another copy of the function \p F such that the copied version has
|
||||
/// internal linkage afterwards and can be analysed. Then we replace all uses
|
||||
/// of the original function to the copied one
|
||||
@ -1870,6 +1874,22 @@ public:
|
||||
/// null pointer.
|
||||
static Function *internalizeFunction(Function &F, bool Force = false);
|
||||
|
||||
/// Make copies of each function in the set \p FnSet such that the copied
|
||||
/// version has internal linkage afterwards and can be analysed. Then we
|
||||
/// replace all uses of the original function to the copied one. The map
|
||||
/// \p FnMap contains a mapping of functions to their internalized versions.
|
||||
///
|
||||
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
|
||||
/// linkage can be internalized because these linkages guarantee that other
|
||||
/// definitions with the same name have the same semantics as this one.
|
||||
///
|
||||
/// This version will internalize all the functions in the set \p FnSet at
|
||||
/// once and then replace the uses. This prevents internalized functions being
|
||||
/// called by external functions when there is an internalized version in the
|
||||
/// module.
|
||||
static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
|
||||
DenseMap<Function *, Function *> &FnMap);
|
||||
|
||||
/// Return the data layout associated with the anchor scope.
|
||||
const DataLayout &getDataLayout() const { return InfoCache.DL; }
|
||||
|
||||
|
@ -51,11 +51,13 @@
|
||||
#define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/ilist.h"
|
||||
#include "llvm/ADT/ilist_node.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include "llvm/Pass.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -176,7 +178,7 @@ public:
|
||||
class PredicateInfo {
|
||||
public:
|
||||
PredicateInfo(Function &, DominatorTree &, AssumptionCache &);
|
||||
~PredicateInfo() = default;
|
||||
~PredicateInfo();
|
||||
|
||||
void verifyPredicateInfo() const;
|
||||
|
||||
@ -203,6 +205,8 @@ private:
|
||||
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
|
||||
// vector.
|
||||
DenseMap<const Value *, const PredicateBase *> PredicateMap;
|
||||
// The set of ssa_copy declarations we created with our custom mangling.
|
||||
SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
|
||||
};
|
||||
|
||||
// This pass does eager building and then printing of PredicateInfo. It is used
|
||||
|
@ -83,6 +83,9 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
|
||||
/// InsertedValues/InsertedPostIncValues.
|
||||
SmallPtrSet<Value *, 16> ReusedValues;
|
||||
|
||||
// The induction variables generated.
|
||||
SmallVector<WeakVH, 2> InsertedIVs;
|
||||
|
||||
/// A memoization of the "relevant" loop for a given SCEV.
|
||||
DenseMap<const SCEV *, const Loop *> RelevantLoops;
|
||||
|
||||
@ -199,9 +202,11 @@ public:
|
||||
InsertedPostIncValues.clear();
|
||||
ReusedValues.clear();
|
||||
ChainedPhis.clear();
|
||||
InsertedIVs.clear();
|
||||
}
|
||||
|
||||
ScalarEvolution *getSE() { return &SE; }
|
||||
const SmallVectorImpl<WeakVH> &getInsertedIVs() const { return InsertedIVs; }
|
||||
|
||||
/// Return a vector containing all instructions inserted during expansion.
|
||||
SmallVector<Instruction *, 32> getAllInsertedInstructions() const {
|
||||
|
@ -4080,6 +4080,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
|
||||
std::swap(TrueVal, FalseVal);
|
||||
}
|
||||
|
||||
// Check for integer min/max with a limit constant:
|
||||
// X > MIN_INT ? X : MIN_INT --> X
|
||||
// X < MAX_INT ? X : MAX_INT --> X
|
||||
if (TrueVal->getType()->isIntOrIntVectorTy()) {
|
||||
Value *X, *Y;
|
||||
SelectPatternFlavor SPF =
|
||||
matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
|
||||
X, Y).Flavor;
|
||||
if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
|
||||
APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
|
||||
X->getType()->getScalarSizeInBits());
|
||||
if (match(Y, m_SpecificInt(LimitC)))
|
||||
return X;
|
||||
}
|
||||
}
|
||||
|
||||
if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
|
||||
Value *X;
|
||||
const APInt *Y;
|
||||
|
@ -6253,6 +6253,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
|
||||
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
|
||||
}
|
||||
|
||||
APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
|
||||
switch (SPF) {
|
||||
case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
|
||||
case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
|
||||
case SPF_UMAX: return APInt::getMaxValue(BitWidth);
|
||||
case SPF_UMIN: return APInt::getMinValue(BitWidth);
|
||||
default: llvm_unreachable("Unexpected flavor");
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<Intrinsic::ID, bool>
|
||||
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
|
||||
// Check if VL contains select instructions that can be folded into a min/max
|
||||
|
@ -20560,8 +20560,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
|
||||
// otherwise => (extract_subvec V1, ExtIdx)
|
||||
uint64_t InsIdx = V.getConstantOperandVal(2);
|
||||
if (InsIdx * SmallVT.getScalarSizeInBits() ==
|
||||
ExtIdx * NVT.getScalarSizeInBits())
|
||||
ExtIdx * NVT.getScalarSizeInBits()) {
|
||||
if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
|
||||
return SDValue();
|
||||
|
||||
return DAG.getBitcast(NVT, V.getOperand(1));
|
||||
}
|
||||
return DAG.getNode(
|
||||
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
|
||||
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
|
||||
|
@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
|
||||
}
|
||||
|
||||
if (Retain) {
|
||||
if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
|
||||
Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
|
||||
if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
|
||||
Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
|
||||
!TM.getTargetTriple().isOSSolaris())
|
||||
Flags |= ELF::SHF_GNU_RETAIN;
|
||||
return NextUniqueID++;
|
||||
}
|
||||
@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal(
|
||||
EmitUniqueSection = true;
|
||||
Flags |= ELF::SHF_LINK_ORDER;
|
||||
}
|
||||
if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() ||
|
||||
Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) {
|
||||
if (Retain &&
|
||||
(Ctx.getAsmInfo()->useIntegratedAssembler() ||
|
||||
Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
|
||||
!TM.getTargetTriple().isOSSolaris()) {
|
||||
EmitUniqueSection = true;
|
||||
Flags |= ELF::SHF_GNU_RETAIN;
|
||||
}
|
||||
|
@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around getFoldedSizeOfImpl() that adds caching.
|
||||
static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
|
||||
DenseMap<Type *, Constant *> &Cache);
|
||||
|
||||
/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known
|
||||
/// factors factored out. If Folded is false, return null if no factoring was
|
||||
/// possible, to avoid endlessly bouncing an unfoldable expression back into the
|
||||
/// top-level folder.
|
||||
static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded,
|
||||
DenseMap<Type *, Constant *> &Cache) {
|
||||
// This is the actual implementation of getFoldedSizeOf(). To get the caching
|
||||
// behavior, we need to call getFoldedSizeOf() when we recurse.
|
||||
|
||||
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
|
||||
Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
|
||||
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache);
|
||||
return ConstantExpr::getNUWMul(E, N);
|
||||
}
|
||||
|
||||
if (StructType *STy = dyn_cast<StructType>(Ty))
|
||||
if (!STy->isPacked()) {
|
||||
unsigned NumElems = STy->getNumElements();
|
||||
// An empty struct has size zero.
|
||||
if (NumElems == 0)
|
||||
return ConstantExpr::getNullValue(DestTy);
|
||||
// Check for a struct with all members having the same size.
|
||||
Constant *MemberSize =
|
||||
getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache);
|
||||
bool AllSame = true;
|
||||
for (unsigned i = 1; i != NumElems; ++i)
|
||||
if (MemberSize !=
|
||||
getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) {
|
||||
AllSame = false;
|
||||
break;
|
||||
}
|
||||
if (AllSame) {
|
||||
Constant *N = ConstantInt::get(DestTy, NumElems);
|
||||
return ConstantExpr::getNUWMul(MemberSize, N);
|
||||
}
|
||||
}
|
||||
|
||||
// Pointer size doesn't depend on the pointee type, so canonicalize them
|
||||
// to an arbitrary pointee.
|
||||
if (PointerType *PTy = dyn_cast<PointerType>(Ty))
|
||||
if (!PTy->getElementType()->isIntegerTy(1))
|
||||
return getFoldedSizeOf(
|
||||
PointerType::get(IntegerType::get(PTy->getContext(), 1),
|
||||
PTy->getAddressSpace()),
|
||||
DestTy, true, Cache);
|
||||
|
||||
// If there's no interesting folding happening, bail so that we don't create
|
||||
// a constant that looks like it needs folding but really doesn't.
|
||||
if (!Folded)
|
||||
return nullptr;
|
||||
|
||||
// Base case: Get a regular sizeof expression.
|
||||
Constant *C = ConstantExpr::getSizeOf(Ty);
|
||||
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
|
||||
DestTy, false),
|
||||
C, DestTy);
|
||||
return C;
|
||||
}
|
||||
|
||||
static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
|
||||
DenseMap<Type *, Constant *> &Cache) {
|
||||
// Check for previously generated folded size constant.
|
||||
auto It = Cache.find(Ty);
|
||||
if (It != Cache.end())
|
||||
return It->second;
|
||||
return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache);
|
||||
}
|
||||
|
||||
static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) {
|
||||
DenseMap<Type *, Constant *> Cache;
|
||||
return getFoldedSizeOf(Ty, DestTy, Folded, Cache);
|
||||
}
|
||||
|
||||
/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known
|
||||
/// factors factored out. If Folded is false, return null if no factoring was
|
||||
/// possible, to avoid endlessly bouncing an unfoldable expression back into the
|
||||
/// top-level folder.
|
||||
static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) {
|
||||
// The alignment of an array is equal to the alignment of the
|
||||
// array element. Note that this is not always true for vectors.
|
||||
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
|
||||
Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
|
||||
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
|
||||
DestTy,
|
||||
false),
|
||||
C, DestTy);
|
||||
return C;
|
||||
}
|
||||
|
||||
if (StructType *STy = dyn_cast<StructType>(Ty)) {
|
||||
// Packed structs always have an alignment of 1.
|
||||
if (STy->isPacked())
|
||||
return ConstantInt::get(DestTy, 1);
|
||||
|
||||
// Otherwise, struct alignment is the maximum alignment of any member.
|
||||
// Without target data, we can't compare much, but we can check to see
|
||||
// if all the members have the same alignment.
|
||||
unsigned NumElems = STy->getNumElements();
|
||||
// An empty struct has minimal alignment.
|
||||
if (NumElems == 0)
|
||||
return ConstantInt::get(DestTy, 1);
|
||||
// Check for a struct with all members having the same alignment.
|
||||
Constant *MemberAlign =
|
||||
getFoldedAlignOf(STy->getElementType(0), DestTy, true);
|
||||
bool AllSame = true;
|
||||
for (unsigned i = 1; i != NumElems; ++i)
|
||||
if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
|
||||
AllSame = false;
|
||||
break;
|
||||
}
|
||||
if (AllSame)
|
||||
return MemberAlign;
|
||||
}
|
||||
|
||||
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
|
||||
// to an arbitrary pointee.
|
||||
if (PointerType *PTy = dyn_cast<PointerType>(Ty))
|
||||
if (!PTy->getElementType()->isIntegerTy(1))
|
||||
return
|
||||
getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
|
||||
1),
|
||||
PTy->getAddressSpace()),
|
||||
DestTy, true);
|
||||
|
||||
// If there's no interesting folding happening, bail so that we don't create
|
||||
// a constant that looks like it needs folding but really doesn't.
|
||||
if (!Folded)
|
||||
return nullptr;
|
||||
|
||||
// Base case: Get a regular alignof expression.
|
||||
Constant *C = ConstantExpr::getAlignOf(Ty);
|
||||
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
|
||||
DestTy, false),
|
||||
C, DestTy);
|
||||
return C;
|
||||
}
|
||||
|
||||
/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with
|
||||
/// any known factors factored out. If Folded is false, return null if no
|
||||
/// factoring was possible, to avoid endlessly bouncing an unfoldable expression
|
||||
/// back into the top-level folder.
|
||||
static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
|
||||
bool Folded) {
|
||||
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
|
||||
Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
|
||||
DestTy, false),
|
||||
FieldNo, DestTy);
|
||||
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
|
||||
return ConstantExpr::getNUWMul(E, N);
|
||||
}
|
||||
|
||||
if (StructType *STy = dyn_cast<StructType>(Ty))
|
||||
if (!STy->isPacked()) {
|
||||
unsigned NumElems = STy->getNumElements();
|
||||
// An empty struct has no members.
|
||||
if (NumElems == 0)
|
||||
return nullptr;
|
||||
// Check for a struct with all members having the same size.
|
||||
Constant *MemberSize =
|
||||
getFoldedSizeOf(STy->getElementType(0), DestTy, true);
|
||||
bool AllSame = true;
|
||||
for (unsigned i = 1; i != NumElems; ++i)
|
||||
if (MemberSize !=
|
||||
getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
|
||||
AllSame = false;
|
||||
break;
|
||||
}
|
||||
if (AllSame) {
|
||||
Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
|
||||
false,
|
||||
DestTy,
|
||||
false),
|
||||
FieldNo, DestTy);
|
||||
return ConstantExpr::getNUWMul(MemberSize, N);
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no interesting folding happening, bail so that we don't create
|
||||
// a constant that looks like it needs folding but really doesn't.
|
||||
if (!Folded)
|
||||
return nullptr;
|
||||
|
||||
// Base case: Get a regular offsetof expression.
|
||||
Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
|
||||
C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
|
||||
DestTy, false),
|
||||
C, DestTy);
|
||||
return C;
|
||||
}
|
||||
|
||||
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
|
||||
Type *DestTy) {
|
||||
if (isa<PoisonValue>(V))
|
||||
@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
|
||||
// Is it a null pointer value?
|
||||
if (V->isNullValue())
|
||||
return ConstantInt::get(DestTy, 0);
|
||||
// If this is a sizeof-like expression, pull out multiplications by
|
||||
// known factors to expose them to subsequent folding. If it's an
|
||||
// alignof-like expression, factor out known factors.
|
||||
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
|
||||
if (CE->getOpcode() == Instruction::GetElementPtr &&
|
||||
CE->getOperand(0)->isNullValue()) {
|
||||
// FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and
|
||||
// getFoldedAlignOf() don't handle the case when DestTy is a vector of
|
||||
// pointers yet. We end up in asserts in CastInst::getCastOpcode (see
|
||||
// test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this
|
||||
// happen in one "real" C-code test case, so it does not seem to be an
|
||||
// important optimization to handle vectors here. For now, simply bail
|
||||
// out.
|
||||
if (DestTy->isVectorTy())
|
||||
return nullptr;
|
||||
GEPOperator *GEPO = cast<GEPOperator>(CE);
|
||||
Type *Ty = GEPO->getSourceElementType();
|
||||
if (CE->getNumOperands() == 2) {
|
||||
// Handle a sizeof-like expression.
|
||||
Constant *Idx = CE->getOperand(1);
|
||||
bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
|
||||
if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
|
||||
Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
|
||||
DestTy, false),
|
||||
Idx, DestTy);
|
||||
return ConstantExpr::getMul(C, Idx);
|
||||
}
|
||||
} else if (CE->getNumOperands() == 3 &&
|
||||
CE->getOperand(1)->isNullValue()) {
|
||||
// Handle an alignof-like expression.
|
||||
if (StructType *STy = dyn_cast<StructType>(Ty))
|
||||
if (!STy->isPacked()) {
|
||||
ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
|
||||
if (CI->isOne() &&
|
||||
STy->getNumElements() == 2 &&
|
||||
STy->getElementType(0)->isIntegerTy(1)) {
|
||||
return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
|
||||
}
|
||||
}
|
||||
// Handle an offsetof-like expression.
|
||||
if (Ty->isStructTy() || Ty->isArrayTy()) {
|
||||
if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
|
||||
DestTy, false))
|
||||
return C;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Other pointer types cannot be casted
|
||||
return nullptr;
|
||||
case Instruction::UIToFP:
|
||||
|
@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const {
|
||||
return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
|
||||
}
|
||||
|
||||
unsigned Module::getNumNamedValues() const {
|
||||
return getValueSymbolTable().size();
|
||||
}
|
||||
|
||||
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
|
||||
/// This ID is uniqued across modules in the current LLVMContext.
|
||||
unsigned Module::getMDKindID(StringRef Name) const {
|
||||
|
@ -366,6 +366,7 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
|
||||
if (GET_VERSION(Version) != RawInstrProf::Version)
|
||||
return error(instrprof_error::unsupported_version);
|
||||
|
||||
BinaryIdsSize = swap(Header.BinaryIdsSize);
|
||||
CountersDelta = swap(Header.CountersDelta);
|
||||
NamesDelta = swap(Header.NamesDelta);
|
||||
auto DataSize = swap(Header.DataSize);
|
||||
@ -374,7 +375,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
|
||||
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
|
||||
NamesSize = swap(Header.NamesSize);
|
||||
ValueKindLast = swap(Header.ValueKindLast);
|
||||
BinaryIdsSize = swap(Header.BinaryIdsSize);
|
||||
|
||||
auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
|
||||
auto PaddingSize = getNumPaddingBytes(NamesSize);
|
||||
|
@ -4353,8 +4353,13 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
|
||||
if (IsFixedLength) {
|
||||
assert(Subtarget->useSVEForFixedLengthVectors() &&
|
||||
"Cannot lower when not using SVE for fixed vectors");
|
||||
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
|
||||
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
|
||||
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
|
||||
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
|
||||
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
|
||||
} else {
|
||||
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
|
||||
IndexVT = MemVT.changeTypeToInteger();
|
||||
}
|
||||
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
|
||||
Mask = DAG.getNode(
|
||||
ISD::ZERO_EXTEND, DL,
|
||||
@ -4453,8 +4458,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
|
||||
if (IsFixedLength) {
|
||||
assert(Subtarget->useSVEForFixedLengthVectors() &&
|
||||
"Cannot lower when not using SVE for fixed vectors");
|
||||
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
|
||||
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
|
||||
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
|
||||
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
|
||||
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
|
||||
} else {
|
||||
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
|
||||
IndexVT = MemVT.changeTypeToInteger();
|
||||
}
|
||||
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
|
||||
|
||||
StoreVal =
|
||||
|
@ -1120,6 +1120,16 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
||||
if (!MI.getOperand(1).isReg())
|
||||
return false;
|
||||
|
||||
auto NormalizeCmpValue = [](int64_t Value) -> int {
|
||||
// Comparison immediates may be 64-bit, but CmpValue is only an int.
|
||||
// Normalize to 0/1/2 return value, where 2 indicates any value apart from
|
||||
// 0 or 1.
|
||||
// TODO: Switch CmpValue to int64_t in the API to avoid this.
|
||||
if (Value == 0 || Value == 1)
|
||||
return Value;
|
||||
return 2;
|
||||
};
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
@ -1155,8 +1165,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
// FIXME: In order to convert CmpValue to 0 or 1
|
||||
CmpValue = MI.getOperand(2).getImm() != 0;
|
||||
CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm());
|
||||
return true;
|
||||
case AArch64::ANDSWri:
|
||||
case AArch64::ANDSXri:
|
||||
@ -1165,14 +1174,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
SrcReg2 = 0;
|
||||
CmpMask = ~0;
|
||||
// FIXME:The return val type of decodeLogicalImmediate is uint64_t,
|
||||
// while the type of CmpValue is int. When converting uint64_t to int,
|
||||
// the high 32 bits of uint64_t will be lost.
|
||||
// In fact it causes a bug in spec2006-483.xalancbmk
|
||||
// CmpValue is only used to compare with zero in OptimizeCompareInstr
|
||||
CmpValue = AArch64_AM::decodeLogicalImmediate(
|
||||
CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate(
|
||||
MI.getOperand(2).getImm(),
|
||||
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
|
||||
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64));
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1462,10 +1466,9 @@ bool AArch64InstrInfo::optimizeCompareInstr(
|
||||
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
|
||||
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
|
||||
|
||||
// Continue only if we have a "ri" where immediate is zero.
|
||||
// FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
|
||||
// function.
|
||||
assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
|
||||
// Warning: CmpValue == 2 indicates *any* value apart from 0 or 1.
|
||||
assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) &&
|
||||
"CmpValue must be 0, 1, or 2!");
|
||||
if (SrcReg2 != 0)
|
||||
return false;
|
||||
|
||||
@ -1473,9 +1476,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
|
||||
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
|
||||
return false;
|
||||
|
||||
if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
|
||||
if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
|
||||
return true;
|
||||
return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
|
||||
return (CmpValue == 0 || CmpValue == 1) &&
|
||||
removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
|
||||
}
|
||||
|
||||
/// Get opcode of S version of Instr.
|
||||
|
@ -1647,7 +1647,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
|
||||
"CMP_SWAP not expected to be custom expanded for Thumb1");
|
||||
assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
|
||||
"ARMv8-M.baseline does not have t2UXTB/t2UXTH");
|
||||
assert(ARM::tGPRRegClass.contains(DesiredReg) &&
|
||||
assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
|
||||
"DesiredReg used for UXT op must be tGPR");
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,24 @@ public:
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
}
|
||||
|
||||
InstructionCost getArithmeticInstrCost(
|
||||
unsigned Opcode, Type *Ty,
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
|
||||
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
||||
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
||||
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
||||
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
||||
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
|
||||
const Instruction *CxtI = nullptr) {
|
||||
int ISD = TLI->InstructionOpcodeToISD(Opcode);
|
||||
if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput)
|
||||
return SCEVCheapExpansionBudget.getValue() + 1;
|
||||
|
||||
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
|
||||
Opd2Info, Opd1PropInfo,
|
||||
Opd2PropInfo);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
||||
case Intrinsic::experimental_constrained_sin:
|
||||
case Intrinsic::experimental_constrained_cos:
|
||||
return true;
|
||||
// There is no corresponding FMA instruction for PPC double double.
|
||||
// Thus, we need to disable CTR loop generation for this type.
|
||||
case Intrinsic::fmuladd:
|
||||
case Intrinsic::copysign:
|
||||
if (CI->getArgOperand(0)->getType()->getScalarType()->
|
||||
isPPC_FP128Ty())
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
|
||||
def : ReadAdvance<ReadFClass32, 0>;
|
||||
def : ReadAdvance<ReadFClass64, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported extensions
|
||||
defm : UnsupportedSchedV;
|
||||
defm : UnsupportedSchedZba;
|
||||
defm : UnsupportedSchedZbb;
|
||||
defm : UnsupportedSchedZfh;
|
||||
|
@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
|
||||
def : ReadAdvance<ReadFClass32, 0>;
|
||||
def : ReadAdvance<ReadFClass64, 0>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Unsupported extensions
|
||||
defm : UnsupportedSchedV;
|
||||
defm : UnsupportedSchedZba;
|
||||
defm : UnsupportedSchedZbb;
|
||||
defm : UnsupportedSchedZfh;
|
||||
|
@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>;
|
||||
|
||||
// Include the scheduler resources for other instruction extensions.
|
||||
include "RISCVScheduleB.td"
|
||||
include "RISCVScheduleV.td"
|
||||
|
820
llvm/lib/Target/RISCV/RISCVScheduleV.td
Normal file
820
llvm/lib/Target/RISCV/RISCVScheduleV.td
Normal file
@ -0,0 +1,820 @@
|
||||
//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// Define scheduler resources associated with def operands.
|
||||
|
||||
// 7. Vector Loads and Stores
|
||||
// 7.4. Vector Unit-Stride Instructions
|
||||
def WriteVLDE8 : SchedWrite;
|
||||
def WriteVLDE16 : SchedWrite;
|
||||
def WriteVLDE32 : SchedWrite;
|
||||
def WriteVLDE64 : SchedWrite;
|
||||
def WriteVSTE8 : SchedWrite;
|
||||
def WriteVSTE16 : SchedWrite;
|
||||
def WriteVSTE32 : SchedWrite;
|
||||
def WriteVSTE64 : SchedWrite;
|
||||
// 7.4.1. Vector Unit-Strided Mask
|
||||
def WriteVLDM : SchedWrite;
|
||||
def WriteVSTM : SchedWrite;
|
||||
// 7.5. Vector Strided Instructions
|
||||
def WriteVLDS8 : SchedWrite;
|
||||
def WriteVLDS16 : SchedWrite;
|
||||
def WriteVLDS32 : SchedWrite;
|
||||
def WriteVLDS64 : SchedWrite;
|
||||
def WriteVSTS8 : SchedWrite;
|
||||
def WriteVSTS16 : SchedWrite;
|
||||
def WriteVSTS32 : SchedWrite;
|
||||
def WriteVSTS64 : SchedWrite;
|
||||
// 7.6. Vector Indexed Instructions
|
||||
def WriteVLDUX8 : SchedWrite;
|
||||
def WriteVLDUX16 : SchedWrite;
|
||||
def WriteVLDUX32 : SchedWrite;
|
||||
def WriteVLDUX64 : SchedWrite;
|
||||
def WriteVLDOX8 : SchedWrite;
|
||||
def WriteVLDOX16 : SchedWrite;
|
||||
def WriteVLDOX32 : SchedWrite;
|
||||
def WriteVLDOX64 : SchedWrite;
|
||||
def WriteVSTUX8 : SchedWrite;
|
||||
def WriteVSTUX16 : SchedWrite;
|
||||
def WriteVSTUX32 : SchedWrite;
|
||||
def WriteVSTUX64 : SchedWrite;
|
||||
def WriteVSTOX8 : SchedWrite;
|
||||
def WriteVSTOX16 : SchedWrite;
|
||||
def WriteVSTOX32 : SchedWrite;
|
||||
def WriteVSTOX64 : SchedWrite;
|
||||
// 7.7. Vector Unit-stride Fault-Only-First Loads
|
||||
def WriteVLDFF8 : SchedWrite;
|
||||
def WriteVLDFF16 : SchedWrite;
|
||||
def WriteVLDFF32 : SchedWrite;
|
||||
def WriteVLDFF64 : SchedWrite;
|
||||
// 7.9. Vector Whole Register Instructions
|
||||
def WriteVLD1R8 : SchedWrite;
|
||||
def WriteVLD1R16 : SchedWrite;
|
||||
def WriteVLD1R32 : SchedWrite;
|
||||
def WriteVLD1R64 : SchedWrite;
|
||||
def WriteVLD2R8 : SchedWrite;
|
||||
def WriteVLD2R16 : SchedWrite;
|
||||
def WriteVLD2R32 : SchedWrite;
|
||||
def WriteVLD2R64 : SchedWrite;
|
||||
def WriteVLD4R8 : SchedWrite;
|
||||
def WriteVLD4R16 : SchedWrite;
|
||||
def WriteVLD4R32 : SchedWrite;
|
||||
def WriteVLD4R64 : SchedWrite;
|
||||
def WriteVLD8R8 : SchedWrite;
|
||||
def WriteVLD8R16 : SchedWrite;
|
||||
def WriteVLD8R32 : SchedWrite;
|
||||
def WriteVLD8R64 : SchedWrite;
|
||||
def WriteVST1R : SchedWrite;
|
||||
def WriteVST2R : SchedWrite;
|
||||
def WriteVST4R : SchedWrite;
|
||||
def WriteVST8R : SchedWrite;
|
||||
|
||||
// 11. Vector Integer Arithmetic Instructions
|
||||
// 11.1. Vector Single-Width Integer Add and Subtract
|
||||
// 11.5. Vector Bitwise Logical Instructions
|
||||
def WriteVIALUV : SchedWrite;
|
||||
def WriteVIALUX : SchedWrite;
|
||||
def WriteVIALUI : SchedWrite;
|
||||
// 11.2. Vector Widening Integer Add/Subtract
|
||||
def WriteVIWALUV : SchedWrite;
|
||||
def WriteVIWALUX : SchedWrite;
|
||||
def WriteVIWALUI : SchedWrite;
|
||||
// 11.3. Vector Integer Extension
|
||||
def WriteVExtV : SchedWrite;
|
||||
// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
|
||||
def WriteVICALUV : SchedWrite;
|
||||
def WriteVICALUX : SchedWrite;
|
||||
def WriteVICALUI : SchedWrite;
|
||||
// 11.6. Vector Single-Width Bit Shift Instructions
|
||||
def WriteVShiftV : SchedWrite;
|
||||
def WriteVShiftX : SchedWrite;
|
||||
def WriteVShiftI : SchedWrite;
|
||||
// 11.7. Vector Narrowing Integer Right Shift Instructions
|
||||
def WriteVNShiftV : SchedWrite;
|
||||
def WriteVNShiftX : SchedWrite;
|
||||
def WriteVNShiftI : SchedWrite;
|
||||
// 11.8. Vector Integer Comparison Instructions
|
||||
// 11.9. Vector Integer Min/Max Instructions
|
||||
def WriteVICmpV : SchedWrite;
|
||||
def WriteVICmpX : SchedWrite;
|
||||
def WriteVICmpI : SchedWrite;
|
||||
// 11.10. Vector Single-Width Integer Multiply Instructions
|
||||
def WriteVIMulV : SchedWrite;
|
||||
def WriteVIMulX : SchedWrite;
|
||||
// 11.11. Vector Integer Divide Instructions
|
||||
def WriteVIDivV : SchedWrite;
|
||||
def WriteVIDivX : SchedWrite;
|
||||
// 11.12. Vector Widening Integer Multiply Instructions
|
||||
def WriteVIWMulV : SchedWrite;
|
||||
def WriteVIWMulX : SchedWrite;
|
||||
// 11.13. Vector Single-Width Integer Multiply-Add Instructions
|
||||
def WriteVIMulAddV : SchedWrite;
|
||||
def WriteVIMulAddX : SchedWrite;
|
||||
// 11.14. Vector Widening Integer Multiply-Add Instructions
|
||||
def WriteVIWMulAddV : SchedWrite;
|
||||
def WriteVIWMulAddX : SchedWrite;
|
||||
// 11.15. Vector Integer Merge Instructions
|
||||
def WriteVIMergeV : SchedWrite;
|
||||
def WriteVIMergeX : SchedWrite;
|
||||
def WriteVIMergeI : SchedWrite;
|
||||
// 11.16. Vector Integer Move Instructions
|
||||
def WriteVIMovV : SchedWrite;
|
||||
def WriteVIMovX : SchedWrite;
|
||||
def WriteVIMovI : SchedWrite;
|
||||
|
||||
// 12. Vector Fixed-Point Arithmetic Instructions
|
||||
// 12.1. Vector Single-Width Saturating Add and Subtract
|
||||
def WriteVSALUV : SchedWrite;
|
||||
def WriteVSALUX : SchedWrite;
|
||||
def WriteVSALUI : SchedWrite;
|
||||
// 12.2. Vector Single-Width Averaging Add and Subtract
|
||||
def WriteVAALUV : SchedWrite;
|
||||
def WriteVAALUX : SchedWrite;
|
||||
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
|
||||
def WriteVSMulV : SchedWrite;
|
||||
def WriteVSMulX : SchedWrite;
|
||||
// 12.4. Vector Single-Width Scaling Shift Instructions
|
||||
def WriteVSShiftV : SchedWrite;
|
||||
def WriteVSShiftX : SchedWrite;
|
||||
def WriteVSShiftI : SchedWrite;
|
||||
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
|
||||
def WriteVNClipV : SchedWrite;
|
||||
def WriteVNClipX : SchedWrite;
|
||||
def WriteVNClipI : SchedWrite;
|
||||
|
||||
// 13. Vector Floating-Point Instructions
|
||||
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
|
||||
def WriteVFALUV : SchedWrite;
|
||||
def WriteVFALUF : SchedWrite;
|
||||
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
|
||||
def WriteVFWALUV : SchedWrite;
|
||||
def WriteVFWALUF : SchedWrite;
|
||||
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
|
||||
def WriteVFMulV : SchedWrite;
|
||||
def WriteVFMulF : SchedWrite;
|
||||
def WriteVFDivV : SchedWrite;
|
||||
def WriteVFDivF : SchedWrite;
|
||||
// 13.5. Vector Widening Floating-Point Multiply
|
||||
def WriteVFWMulV : SchedWrite;
|
||||
def WriteVFWMulF : SchedWrite;
|
||||
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
|
||||
def WriteVFMulAddV : SchedWrite;
|
||||
def WriteVFMulAddF : SchedWrite;
|
||||
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
|
||||
def WriteVFWMulAddV : SchedWrite;
|
||||
def WriteVFWMulAddF : SchedWrite;
|
||||
// 13.8. Vector Floating-Point Square-Root Instruction
|
||||
def WriteVFSqrtV : SchedWrite;
|
||||
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
|
||||
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
|
||||
def WriteVFRecpV : SchedWrite;
|
||||
// 13.11. Vector Floating-Point MIN/MAX Instructions
|
||||
// 13.13. Vector Floating-Point Compare Instructions
|
||||
def WriteVFCmpV : SchedWrite;
|
||||
def WriteVFCmpF : SchedWrite;
|
||||
// 13.12. Vector Floating-Point Sign-Injection Instructions
|
||||
def WriteVFSgnjV : SchedWrite;
|
||||
def WriteVFSgnjF : SchedWrite;
|
||||
// 13.14. Vector Floating-Point Classify Instruction
|
||||
def WriteVFClassV : SchedWrite;
|
||||
// 13.15. Vector Floating-Point Merge Instruction
|
||||
def WriteVFMergeV : SchedWrite;
|
||||
// 13.16. Vector Floating-Point Move Instruction
|
||||
def WriteVFMovV : SchedWrite;
|
||||
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
|
||||
def WriteVFCvtIToFV : SchedWrite;
|
||||
def WriteVFCvtFToIV : SchedWrite;
|
||||
def WriteVFCvtFToFV : SchedWrite;
|
||||
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
|
||||
def WriteVFWCvtIToFV : SchedWrite;
|
||||
def WriteVFWCvtFToIV : SchedWrite;
|
||||
def WriteVFWCvtFToFV : SchedWrite;
|
||||
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
|
||||
def WriteVFNCvtIToFV : SchedWrite;
|
||||
def WriteVFNCvtFToIV : SchedWrite;
|
||||
def WriteVFNCvtFToFV : SchedWrite;
|
||||
|
||||
// 14. Vector Reduction Operations
|
||||
// 14.1. Vector Single-Width Integer Reduction Instructions
|
||||
def WriteVIRedV : SchedWrite;
|
||||
// 14.2. Vector Widening Integer Reduction Instructions
|
||||
def WriteVIWRedV : SchedWrite;
|
||||
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
|
||||
def WriteVFRedV : SchedWrite;
|
||||
def WriteVFRedOV : SchedWrite;
|
||||
// 14.4. Vector Widening Floating-Point Reduction Instructions
|
||||
def WriteVFWRedV : SchedWrite;
|
||||
def WriteVFWRedOV : SchedWrite;
|
||||
|
||||
// 15. Vector Mask Instructions
|
||||
// 15.1. Vector Mask-Register Logical Instructions
|
||||
def WriteVMALUV : SchedWrite;
|
||||
// 15.2. Vector Mask Population Count
|
||||
def WriteVMPopV : SchedWrite;
|
||||
// 15.3. Vector Find-First-Set Mask Bit
|
||||
def WriteVMFFSV : SchedWrite;
|
||||
// 15.4. Vector Set-Before-First Mask Bit
|
||||
// 15.5. Vector Set-Including-First Mask Bit
|
||||
// 15.6. Vector Set-only-First Mask Bit
|
||||
def WriteVMSFSV : SchedWrite;
|
||||
// 15.8. Vector Iota Instruction
|
||||
def WriteVMIotV : SchedWrite;
|
||||
// 15.9. Vector Element Index Instruction
|
||||
def WriteVMIdxV : SchedWrite;
|
||||
|
||||
// 16. Vector Permutation Instructions
|
||||
// 16.1. Integer Scalar Move Instructions
|
||||
def WriteVIMovVX : SchedWrite;
|
||||
def WriteVIMovXV : SchedWrite;
|
||||
// 16.2. Floating-Point Scalar Move Instructions
|
||||
def WriteVFMovVF : SchedWrite;
|
||||
def WriteVFMovFV : SchedWrite;
|
||||
// 16.3. Vector Slide Instructions
|
||||
def WriteVISlideX : SchedWrite;
|
||||
def WriteVISlideI : SchedWrite;
|
||||
def WriteVISlide1X : SchedWrite;
|
||||
def WriteVFSlide1F : SchedWrite;
|
||||
// 16.4. Vector Register Gather Instructions
|
||||
def WriteVGatherV : SchedWrite;
|
||||
def WriteVGatherX : SchedWrite;
|
||||
def WriteVGatherI : SchedWrite;
|
||||
// 16.5. Vector Compress Instruction
|
||||
def WriteVCompressV : SchedWrite;
|
||||
// 16.6. Whole Vector Register Move
|
||||
def WriteVMov1V : SchedWrite;
|
||||
def WriteVMov2V : SchedWrite;
|
||||
def WriteVMov4V : SchedWrite;
|
||||
def WriteVMov8V : SchedWrite;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// Define scheduler resources associated with use operands.
|
||||
|
||||
// 7. Vector Loads and Stores
|
||||
def ReadVLDX : SchedRead;
|
||||
def ReadVSTX : SchedRead;
|
||||
// 7.4. Vector Unit-Stride Instructions
|
||||
def ReadVSTE8V : SchedRead;
|
||||
def ReadVSTE16V : SchedRead;
|
||||
def ReadVSTE32V : SchedRead;
|
||||
def ReadVSTE64V : SchedRead;
|
||||
// 7.4.1. Vector Unit-Strided Mask
|
||||
def ReadVSTM : SchedRead;
|
||||
// 7.5. Vector Strided Instructions
|
||||
def ReadVLDSX : SchedRead;
|
||||
def ReadVSTSX : SchedRead;
|
||||
def ReadVSTS8V : SchedRead;
|
||||
def ReadVSTS16V : SchedRead;
|
||||
def ReadVSTS32V : SchedRead;
|
||||
def ReadVSTS64V : SchedRead;
|
||||
// 7.6. Vector Indexed Instructions
|
||||
def ReadVLDUXV : SchedRead;
|
||||
def ReadVLDOXV : SchedRead;
|
||||
def ReadVSTUX8 : SchedRead;
|
||||
def ReadVSTUX16 : SchedRead;
|
||||
def ReadVSTUX32 : SchedRead;
|
||||
def ReadVSTUX64 : SchedRead;
|
||||
def ReadVSTUXV : SchedRead;
|
||||
def ReadVSTUX8V : SchedRead;
|
||||
def ReadVSTUX16V : SchedRead;
|
||||
def ReadVSTUX32V : SchedRead;
|
||||
def ReadVSTUX64V : SchedRead;
|
||||
def ReadVSTOX8 : SchedRead;
|
||||
def ReadVSTOX16 : SchedRead;
|
||||
def ReadVSTOX32 : SchedRead;
|
||||
def ReadVSTOX64 : SchedRead;
|
||||
def ReadVSTOXV : SchedRead;
|
||||
def ReadVSTOX8V : SchedRead;
|
||||
def ReadVSTOX16V : SchedRead;
|
||||
def ReadVSTOX32V : SchedRead;
|
||||
def ReadVSTOX64V : SchedRead;
|
||||
// 7.9. Vector Whole Register Instructions
|
||||
def ReadVST1R : SchedRead;
|
||||
def ReadVST2R : SchedRead;
|
||||
def ReadVST4R : SchedRead;
|
||||
def ReadVST8R : SchedRead;
|
||||
|
||||
// 11. Vector Integer Arithmetic Instructions
|
||||
// 11.1. Vector Single-Width Integer Add and Subtract
|
||||
// 11.5. Vector Bitwise Logical Instructions
|
||||
def ReadVIALUV : SchedRead;
|
||||
def ReadVIALUX : SchedRead;
|
||||
// 11.2. Vector Widening Integer Add/Subtract
|
||||
def ReadVIWALUV : SchedRead;
|
||||
def ReadVIWALUX : SchedRead;
|
||||
// 11.3. Vector Integer Extension
|
||||
def ReadVExtV : SchedRead;
|
||||
// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
|
||||
def ReadVIALUCV : SchedRead;
|
||||
def ReadVIALUCX : SchedRead;
|
||||
// 11.6. Vector Single-Width Bit Shift Instructions
|
||||
def ReadVShiftV : SchedRead;
|
||||
def ReadVShiftX : SchedRead;
|
||||
// 11.7. Vector Narrowing Integer Right Shift Instructions
|
||||
def ReadVNShiftV : SchedRead;
|
||||
def ReadVNShiftX : SchedRead;
|
||||
// 11.8. Vector Integer Comparison Instructions
|
||||
// 11.9. Vector Integer Min/Max Instructions
|
||||
def ReadVICmpV : SchedRead;
|
||||
def ReadVICmpX : SchedRead;
|
||||
// 11.10. Vector Single-Width Integer Multiply Instructions
|
||||
def ReadVIMulV : SchedRead;
|
||||
def ReadVIMulX : SchedRead;
|
||||
// 11.11. Vector Integer Divide Instructions
|
||||
def ReadVIDivV : SchedRead;
|
||||
def ReadVIDivX : SchedRead;
|
||||
// 11.12. Vector Widening Integer Multiply Instructions
|
||||
def ReadVIWMulV : SchedRead;
|
||||
def ReadVIWMulX : SchedRead;
|
||||
// 11.13. Vector Single-Width Integer Multiply-Add Instructions
|
||||
def ReadVIMulAddV : SchedRead;
|
||||
def ReadVIMulAddX : SchedRead;
|
||||
// 11.14. Vector Widening Integer Multiply-Add Instructions
|
||||
def ReadVIWMulAddV : SchedRead;
|
||||
def ReadVIWMulAddX : SchedRead;
|
||||
// 11.15. Vector Integer Merge Instructions
|
||||
def ReadVIMergeV : SchedRead;
|
||||
def ReadVIMergeX : SchedRead;
|
||||
// 11.16. Vector Integer Move Instructions
|
||||
def ReadVIMovV : SchedRead;
|
||||
def ReadVIMovX : SchedRead;
|
||||
|
||||
// 12. Vector Fixed-Point Arithmetic Instructions
|
||||
// 12.1. Vector Single-Width Saturating Add and Subtract
|
||||
def ReadVSALUV : SchedRead;
|
||||
def ReadVSALUX : SchedRead;
|
||||
// 12.2. Vector Single-Width Averaging Add and Subtract
|
||||
def ReadVAALUV : SchedRead;
|
||||
def ReadVAALUX : SchedRead;
|
||||
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
|
||||
def ReadVSMulV : SchedRead;
|
||||
def ReadVSMulX : SchedRead;
|
||||
// 12.4. Vector Single-Width Scaling Shift Instructions
|
||||
def ReadVSShiftV : SchedRead;
|
||||
def ReadVSShiftX : SchedRead;
|
||||
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
|
||||
def ReadVNClipV : SchedRead;
|
||||
def ReadVNClipX : SchedRead;
|
||||
|
||||
// 13. Vector Floating-Point Instructions
|
||||
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
|
||||
def ReadVFALUV : SchedRead;
|
||||
def ReadVFALUF : SchedRead;
|
||||
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
|
||||
def ReadVFWALUV : SchedRead;
|
||||
def ReadVFWALUF : SchedRead;
|
||||
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
|
||||
def ReadVFMulV : SchedRead;
|
||||
def ReadVFMulF : SchedRead;
|
||||
def ReadVFDivV : SchedRead;
|
||||
def ReadVFDivF : SchedRead;
|
||||
// 13.5. Vector Widening Floating-Point Multiply
|
||||
def ReadVFWMulV : SchedRead;
|
||||
def ReadVFWMulF : SchedRead;
|
||||
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
|
||||
def ReadVFMulAddV : SchedRead;
|
||||
def ReadVFMulAddF : SchedRead;
|
||||
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
|
||||
def ReadVFWMulAddV : SchedRead;
|
||||
def ReadVFWMulAddF : SchedRead;
|
||||
// 13.8. Vector Floating-Point Square-Root Instruction
|
||||
def ReadVFSqrtV : SchedRead;
|
||||
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
|
||||
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
|
||||
def ReadVFRecpV : SchedRead;
|
||||
// 13.11. Vector Floating-Point MIN/MAX Instructions
|
||||
// 13.13. Vector Floating-Point Compare Instructions
|
||||
def ReadVFCmpV : SchedRead;
|
||||
def ReadVFCmpF : SchedRead;
|
||||
// 13.12. Vector Floating-Point Sign-Injection Instructions
|
||||
def ReadVFSgnjV : SchedRead;
|
||||
def ReadVFSgnjF : SchedRead;
|
||||
// 13.14. Vector Floating-Point Classify Instruction
|
||||
def ReadVFClassV : SchedRead;
|
||||
// 13.15. Vector Floating-Point Merge Instruction
|
||||
def ReadVFMergeV : SchedRead;
|
||||
def ReadVFMergeF : SchedRead;
|
||||
// 13.16. Vector Floating-Point Move Instruction
|
||||
def ReadVFMovF : SchedRead;
|
||||
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
|
||||
def ReadVFCvtIToFV : SchedRead;
|
||||
def ReadVFCvtFToIV : SchedRead;
|
||||
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
|
||||
def ReadVFWCvtIToFV : SchedRead;
|
||||
def ReadVFWCvtFToIV : SchedRead;
|
||||
def ReadVFWCvtFToFV : SchedRead;
|
||||
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
|
||||
def ReadVFNCvtIToFV : SchedRead;
|
||||
def ReadVFNCvtFToIV : SchedRead;
|
||||
def ReadVFNCvtFToFV : SchedRead;
|
||||
|
||||
// 14. Vector Reduction Operations
|
||||
// 14.1. Vector Single-Width Integer Reduction Instructions
|
||||
def ReadVIRedV : SchedRead;
|
||||
def ReadVIRedV0 : SchedRead;
|
||||
// 14.2. Vector Widening Integer Reduction Instructions
|
||||
def ReadVIWRedV : SchedRead;
|
||||
def ReadVIWRedV0 : SchedRead;
|
||||
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
|
||||
def ReadVFRedV : SchedRead;
|
||||
def ReadVFRedV0 : SchedRead;
|
||||
def ReadVFRedOV : SchedRead;
|
||||
def ReadVFRedOV0 : SchedRead;
|
||||
// 14.4. Vector Widening Floating-Point Reduction Instructions
|
||||
def ReadVFWRedV : SchedRead;
|
||||
def ReadVFWRedV0 : SchedRead;
|
||||
def ReadVFWRedOV : SchedRead;
|
||||
def ReadVFWRedOV0 : SchedRead;
|
||||
|
||||
// 15. Vector Mask Instructions
|
||||
// 15.1. Vector Mask-Register Logical Instructions
|
||||
def ReadVMALUV : SchedRead;
|
||||
// 15.2. Vector Mask Population Count
|
||||
def ReadVMPopV : SchedRead;
|
||||
// 15.3. Vector Find-First-Set Mask Bit
|
||||
def ReadVMFFSV : SchedRead;
|
||||
// 15.4. Vector Set-Before-First Mask Bit
|
||||
// 15.5. Vector Set-Including-First Mask Bit
|
||||
// 15.6. Vector Set-only-First Mask Bit
|
||||
def ReadVMSFSV : SchedRead;
|
||||
// 15.8. Vector Iota Instruction
|
||||
def ReadVMIotV : SchedRead;
|
||||
|
||||
// 16. Vector Permutation Instructions
|
||||
// 16.1. Integer Scalar Move Instructions
|
||||
def ReadVIMovVX : SchedRead;
|
||||
def ReadVIMovXV : SchedRead;
|
||||
def ReadVIMovXX : SchedRead;
|
||||
// 16.2. Floating-Point Scalar Move Instructions
|
||||
def ReadVFMovVF : SchedRead;
|
||||
def ReadVFMovFV : SchedRead;
|
||||
def ReadVFMovFX : SchedRead;
|
||||
// 16.3. Vector Slide Instructions
|
||||
def ReadVISlideV : SchedRead;
|
||||
def ReadVISlideX : SchedRead;
|
||||
def ReadVFSlideV : SchedRead;
|
||||
def ReadVFSlideF : SchedRead;
|
||||
// 16.4. Vector Register Gather Instructions
|
||||
def ReadVGatherV : SchedRead;
|
||||
def ReadVGatherX : SchedRead;
|
||||
// 16.5. Vector Compress Instruction
|
||||
def ReadVCompressV : SchedRead;
|
||||
// 16.6. Whole Vector Register Move
|
||||
def ReadVMov1V : SchedRead;
|
||||
def ReadVMov2V : SchedRead;
|
||||
def ReadVMov4V : SchedRead;
|
||||
def ReadVMov8V : SchedRead;
|
||||
|
||||
// Others
|
||||
def ReadVMask : SchedRead;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// Define default scheduler resources for V.
|
||||
|
||||
multiclass UnsupportedSchedV {
|
||||
let Unsupported = true in {
|
||||
|
||||
// 7. Vector Loads and Stores
|
||||
def : WriteRes<WriteVLDE8, []>;
|
||||
def : WriteRes<WriteVLDE16, []>;
|
||||
def : WriteRes<WriteVLDE32, []>;
|
||||
def : WriteRes<WriteVLDE64, []>;
|
||||
def : WriteRes<WriteVSTE8, []>;
|
||||
def : WriteRes<WriteVSTE16, []>;
|
||||
def : WriteRes<WriteVSTE32, []>;
|
||||
def : WriteRes<WriteVSTE64, []>;
|
||||
def : WriteRes<WriteVLDM, []>;
|
||||
def : WriteRes<WriteVSTM, []>;
|
||||
def : WriteRes<WriteVLDS8, []>;
|
||||
def : WriteRes<WriteVLDS16, []>;
|
||||
def : WriteRes<WriteVLDS32, []>;
|
||||
def : WriteRes<WriteVLDS64, []>;
|
||||
def : WriteRes<WriteVSTS8, []>;
|
||||
def : WriteRes<WriteVSTS16, []>;
|
||||
def : WriteRes<WriteVSTS32, []>;
|
||||
def : WriteRes<WriteVSTS64, []>;
|
||||
def : WriteRes<WriteVLDUX8, []>;
|
||||
def : WriteRes<WriteVLDUX16, []>;
|
||||
def : WriteRes<WriteVLDUX32, []>;
|
||||
def : WriteRes<WriteVLDUX64, []>;
|
||||
def : WriteRes<WriteVLDOX8, []>;
|
||||
def : WriteRes<WriteVLDOX16, []>;
|
||||
def : WriteRes<WriteVLDOX32, []>;
|
||||
def : WriteRes<WriteVLDOX64, []>;
|
||||
def : WriteRes<WriteVSTUX8, []>;
|
||||
def : WriteRes<WriteVSTUX16, []>;
|
||||
def : WriteRes<WriteVSTUX32, []>;
|
||||
def : WriteRes<WriteVSTUX64, []>;
|
||||
def : WriteRes<WriteVSTOX8, []>;
|
||||
def : WriteRes<WriteVSTOX16, []>;
|
||||
def : WriteRes<WriteVSTOX32, []>;
|
||||
def : WriteRes<WriteVSTOX64, []>;
|
||||
def : WriteRes<WriteVLDFF8, []>;
|
||||
def : WriteRes<WriteVLDFF16, []>;
|
||||
def : WriteRes<WriteVLDFF32, []>;
|
||||
def : WriteRes<WriteVLDFF64, []>;
|
||||
def : WriteRes<WriteVLD1R8, []>;
|
||||
def : WriteRes<WriteVLD1R16, []>;
|
||||
def : WriteRes<WriteVLD1R32, []>;
|
||||
def : WriteRes<WriteVLD1R64, []>;
|
||||
def : WriteRes<WriteVLD2R8, []>;
|
||||
def : WriteRes<WriteVLD2R16, []>;
|
||||
def : WriteRes<WriteVLD2R32, []>;
|
||||
def : WriteRes<WriteVLD2R64, []>;
|
||||
def : WriteRes<WriteVLD4R8, []>;
|
||||
def : WriteRes<WriteVLD4R16, []>;
|
||||
def : WriteRes<WriteVLD4R32, []>;
|
||||
def : WriteRes<WriteVLD4R64, []>;
|
||||
def : WriteRes<WriteVLD8R8, []>;
|
||||
def : WriteRes<WriteVLD8R16, []>;
|
||||
def : WriteRes<WriteVLD8R32, []>;
|
||||
def : WriteRes<WriteVLD8R64, []>;
|
||||
def : WriteRes<WriteVST1R, []>;
|
||||
def : WriteRes<WriteVST2R, []>;
|
||||
def : WriteRes<WriteVST4R, []>;
|
||||
def : WriteRes<WriteVST8R, []>;
|
||||
|
||||
// 12. Vector Integer Arithmetic Instructions
|
||||
def : WriteRes<WriteVIALUV, []>;
|
||||
def : WriteRes<WriteVIALUX, []>;
|
||||
def : WriteRes<WriteVIALUI, []>;
|
||||
def : WriteRes<WriteVIWALUV, []>;
|
||||
def : WriteRes<WriteVIWALUX, []>;
|
||||
def : WriteRes<WriteVIWALUI, []>;
|
||||
def : WriteRes<WriteVExtV, []>;
|
||||
def : WriteRes<WriteVICALUV, []>;
|
||||
def : WriteRes<WriteVICALUX, []>;
|
||||
def : WriteRes<WriteVICALUI, []>;
|
||||
def : WriteRes<WriteVShiftV, []>;
|
||||
def : WriteRes<WriteVShiftX, []>;
|
||||
def : WriteRes<WriteVShiftI, []>;
|
||||
def : WriteRes<WriteVNShiftV, []>;
|
||||
def : WriteRes<WriteVNShiftX, []>;
|
||||
def : WriteRes<WriteVNShiftI, []>;
|
||||
def : WriteRes<WriteVICmpV, []>;
|
||||
def : WriteRes<WriteVICmpX, []>;
|
||||
def : WriteRes<WriteVICmpI, []>;
|
||||
def : WriteRes<WriteVIMulV, []>;
|
||||
def : WriteRes<WriteVIMulX, []>;
|
||||
def : WriteRes<WriteVIDivV, []>;
|
||||
def : WriteRes<WriteVIDivX, []>;
|
||||
def : WriteRes<WriteVIWMulV, []>;
|
||||
def : WriteRes<WriteVIWMulX, []>;
|
||||
def : WriteRes<WriteVIMulAddV, []>;
|
||||
def : WriteRes<WriteVIMulAddX, []>;
|
||||
def : WriteRes<WriteVIWMulAddV, []>;
|
||||
def : WriteRes<WriteVIWMulAddX, []>;
|
||||
def : WriteRes<WriteVIMergeV, []>;
|
||||
def : WriteRes<WriteVIMergeX, []>;
|
||||
def : WriteRes<WriteVIMergeI, []>;
|
||||
def : WriteRes<WriteVIMovV, []>;
|
||||
def : WriteRes<WriteVIMovX, []>;
|
||||
def : WriteRes<WriteVIMovI, []>;
|
||||
|
||||
// 13. Vector Fixed-Point Arithmetic Instructions
|
||||
def : WriteRes<WriteVSALUV, []>;
|
||||
def : WriteRes<WriteVSALUX, []>;
|
||||
def : WriteRes<WriteVSALUI, []>;
|
||||
def : WriteRes<WriteVAALUV, []>;
|
||||
def : WriteRes<WriteVAALUX, []>;
|
||||
def : WriteRes<WriteVSMulV, []>;
|
||||
def : WriteRes<WriteVSMulX, []>;
|
||||
def : WriteRes<WriteVSShiftV, []>;
|
||||
def : WriteRes<WriteVSShiftX, []>;
|
||||
def : WriteRes<WriteVSShiftI, []>;
|
||||
def : WriteRes<WriteVNClipV, []>;
|
||||
def : WriteRes<WriteVNClipX, []>;
|
||||
def : WriteRes<WriteVNClipI, []>;
|
||||
|
||||
// 14. Vector Floating-Point Instructions
|
||||
def : WriteRes<WriteVFALUV, []>;
|
||||
def : WriteRes<WriteVFALUF, []>;
|
||||
def : WriteRes<WriteVFWALUV, []>;
|
||||
def : WriteRes<WriteVFWALUF, []>;
|
||||
def : WriteRes<WriteVFMulV, []>;
|
||||
def : WriteRes<WriteVFMulF, []>;
|
||||
def : WriteRes<WriteVFDivV, []>;
|
||||
def : WriteRes<WriteVFDivF, []>;
|
||||
def : WriteRes<WriteVFWMulV, []>;
|
||||
def : WriteRes<WriteVFWMulF, []>;
|
||||
def : WriteRes<WriteVFMulAddV, []>;
|
||||
def : WriteRes<WriteVFMulAddF, []>;
|
||||
def : WriteRes<WriteVFWMulAddV, []>;
|
||||
def : WriteRes<WriteVFWMulAddF, []>;
|
||||
def : WriteRes<WriteVFSqrtV, []>;
|
||||
def : WriteRes<WriteVFRecpV, []>;
|
||||
def : WriteRes<WriteVFCmpV, []>;
|
||||
def : WriteRes<WriteVFCmpF, []>;
|
||||
def : WriteRes<WriteVFSgnjV, []>;
|
||||
def : WriteRes<WriteVFSgnjF, []>;
|
||||
def : WriteRes<WriteVFClassV, []>;
|
||||
def : WriteRes<WriteVFMergeV, []>;
|
||||
def : WriteRes<WriteVFMovV, []>;
|
||||
def : WriteRes<WriteVFCvtIToFV, []>;
|
||||
def : WriteRes<WriteVFCvtFToIV, []>;
|
||||
def : WriteRes<WriteVFCvtFToFV, []>;
|
||||
def : WriteRes<WriteVFWCvtIToFV, []>;
|
||||
def : WriteRes<WriteVFWCvtFToIV, []>;
|
||||
def : WriteRes<WriteVFWCvtFToFV, []>;
|
||||
def : WriteRes<WriteVFNCvtIToFV, []>;
|
||||
def : WriteRes<WriteVFNCvtFToIV, []>;
|
||||
def : WriteRes<WriteVFNCvtFToFV, []>;
|
||||
|
||||
// 15. Vector Reduction Operations
|
||||
def : WriteRes<WriteVIRedV, []>;
|
||||
def : WriteRes<WriteVIWRedV, []>;
|
||||
def : WriteRes<WriteVFRedV, []>;
|
||||
def : WriteRes<WriteVFRedOV, []>;
|
||||
def : WriteRes<WriteVFWRedV, []>;
|
||||
def : WriteRes<WriteVFWRedOV, []>;
|
||||
|
||||
// 16. Vector Mask Instructions
|
||||
def : WriteRes<WriteVMALUV, []>;
|
||||
def : WriteRes<WriteVMPopV, []>;
|
||||
def : WriteRes<WriteVMFFSV, []>;
|
||||
def : WriteRes<WriteVMSFSV, []>;
|
||||
def : WriteRes<WriteVMIotV, []>;
|
||||
def : WriteRes<WriteVMIdxV, []>;
|
||||
|
||||
// 17. Vector Permutation Instructions
|
||||
def : WriteRes<WriteVIMovVX, []>;
|
||||
def : WriteRes<WriteVIMovXV, []>;
|
||||
def : WriteRes<WriteVFMovVF, []>;
|
||||
def : WriteRes<WriteVFMovFV, []>;
|
||||
def : WriteRes<WriteVISlideX, []>;
|
||||
def : WriteRes<WriteVISlideI, []>;
|
||||
def : WriteRes<WriteVISlide1X, []>;
|
||||
def : WriteRes<WriteVFSlide1F, []>;
|
||||
def : WriteRes<WriteVGatherV, []>;
|
||||
def : WriteRes<WriteVGatherX, []>;
|
||||
def : WriteRes<WriteVGatherI, []>;
|
||||
def : WriteRes<WriteVCompressV, []>;
|
||||
def : WriteRes<WriteVMov1V, []>;
|
||||
def : WriteRes<WriteVMov2V, []>;
|
||||
def : WriteRes<WriteVMov4V, []>;
|
||||
def : WriteRes<WriteVMov8V, []>;
|
||||
|
||||
// 7. Vector Loads and Stores
|
||||
def : ReadAdvance<ReadVLDX, 0>;
|
||||
def : ReadAdvance<ReadVSTX, 0>;
|
||||
def : ReadAdvance<ReadVSTE8V, 0>;
|
||||
def : ReadAdvance<ReadVSTE16V, 0>;
|
||||
def : ReadAdvance<ReadVSTE32V, 0>;
|
||||
def : ReadAdvance<ReadVSTE64V, 0>;
|
||||
def : ReadAdvance<ReadVSTM, 0>;
|
||||
def : ReadAdvance<ReadVLDSX, 0>;
|
||||
def : ReadAdvance<ReadVSTSX, 0>;
|
||||
def : ReadAdvance<ReadVSTS8V, 0>;
|
||||
def : ReadAdvance<ReadVSTS16V, 0>;
|
||||
def : ReadAdvance<ReadVSTS32V, 0>;
|
||||
def : ReadAdvance<ReadVSTS64V, 0>;
|
||||
def : ReadAdvance<ReadVLDUXV, 0>;
|
||||
def : ReadAdvance<ReadVLDOXV, 0>;
|
||||
def : ReadAdvance<ReadVSTUXV, 0>;
|
||||
def : ReadAdvance<ReadVSTUX8, 0>;
|
||||
def : ReadAdvance<ReadVSTUX16, 0>;
|
||||
def : ReadAdvance<ReadVSTUX32, 0>;
|
||||
def : ReadAdvance<ReadVSTUX64, 0>;
|
||||
def : ReadAdvance<ReadVSTUX8V, 0>;
|
||||
def : ReadAdvance<ReadVSTUX16V, 0>;
|
||||
def : ReadAdvance<ReadVSTUX32V, 0>;
|
||||
def : ReadAdvance<ReadVSTUX64V, 0>;
|
||||
def : ReadAdvance<ReadVSTOX8, 0>;
|
||||
def : ReadAdvance<ReadVSTOX16, 0>;
|
||||
def : ReadAdvance<ReadVSTOX32, 0>;
|
||||
def : ReadAdvance<ReadVSTOX64, 0>;
|
||||
def : ReadAdvance<ReadVSTOXV, 0>;
|
||||
def : ReadAdvance<ReadVSTOX8V, 0>;
|
||||
def : ReadAdvance<ReadVSTOX16V, 0>;
|
||||
def : ReadAdvance<ReadVSTOX32V, 0>;
|
||||
def : ReadAdvance<ReadVSTOX64V, 0>;
|
||||
def : ReadAdvance<ReadVST1R, 0>;
|
||||
def : ReadAdvance<ReadVST2R, 0>;
|
||||
def : ReadAdvance<ReadVST4R, 0>;
|
||||
def : ReadAdvance<ReadVST8R, 0>;
|
||||
|
||||
// 12. Vector Integer Arithmetic Instructions
|
||||
def : ReadAdvance<ReadVIALUV, 0>;
|
||||
def : ReadAdvance<ReadVIALUX, 0>;
|
||||
def : ReadAdvance<ReadVIWALUV, 0>;
|
||||
def : ReadAdvance<ReadVIWALUX, 0>;
|
||||
def : ReadAdvance<ReadVExtV, 0>;
|
||||
def : ReadAdvance<ReadVIALUCV, 0>;
|
||||
def : ReadAdvance<ReadVIALUCX, 0>;
|
||||
def : ReadAdvance<ReadVShiftV, 0>;
|
||||
def : ReadAdvance<ReadVShiftX, 0>;
|
||||
def : ReadAdvance<ReadVNShiftV, 0>;
|
||||
def : ReadAdvance<ReadVNShiftX, 0>;
|
||||
def : ReadAdvance<ReadVICmpV, 0>;
|
||||
def : ReadAdvance<ReadVICmpX, 0>;
|
||||
def : ReadAdvance<ReadVIMulV, 0>;
|
||||
def : ReadAdvance<ReadVIMulX, 0>;
|
||||
def : ReadAdvance<ReadVIDivV, 0>;
|
||||
def : ReadAdvance<ReadVIDivX, 0>;
|
||||
def : ReadAdvance<ReadVIWMulV, 0>;
|
||||
def : ReadAdvance<ReadVIWMulX, 0>;
|
||||
def : ReadAdvance<ReadVIMulAddV, 0>;
|
||||
def : ReadAdvance<ReadVIMulAddX, 0>;
|
||||
def : ReadAdvance<ReadVIWMulAddV, 0>;
|
||||
def : ReadAdvance<ReadVIWMulAddX, 0>;
|
||||
def : ReadAdvance<ReadVIMergeV, 0>;
|
||||
def : ReadAdvance<ReadVIMergeX, 0>;
|
||||
def : ReadAdvance<ReadVIMovV, 0>;
|
||||
def : ReadAdvance<ReadVIMovX, 0>;
|
||||
|
||||
// 13. Vector Fixed-Point Arithmetic Instructions
|
||||
def : ReadAdvance<ReadVSALUV, 0>;
|
||||
def : ReadAdvance<ReadVSALUX, 0>;
|
||||
def : ReadAdvance<ReadVAALUV, 0>;
|
||||
def : ReadAdvance<ReadVAALUX, 0>;
|
||||
def : ReadAdvance<ReadVSMulV, 0>;
|
||||
def : ReadAdvance<ReadVSMulX, 0>;
|
||||
def : ReadAdvance<ReadVSShiftV, 0>;
|
||||
def : ReadAdvance<ReadVSShiftX, 0>;
|
||||
def : ReadAdvance<ReadVNClipV, 0>;
|
||||
def : ReadAdvance<ReadVNClipX, 0>;
|
||||
|
||||
// 14. Vector Floating-Point Instructions
|
||||
def : ReadAdvance<ReadVFALUV, 0>;
|
||||
def : ReadAdvance<ReadVFALUF, 0>;
|
||||
def : ReadAdvance<ReadVFWALUV, 0>;
|
||||
def : ReadAdvance<ReadVFWALUF, 0>;
|
||||
def : ReadAdvance<ReadVFMulV, 0>;
|
||||
def : ReadAdvance<ReadVFMulF, 0>;
|
||||
def : ReadAdvance<ReadVFDivV, 0>;
|
||||
def : ReadAdvance<ReadVFDivF, 0>;
|
||||
def : ReadAdvance<ReadVFWMulV, 0>;
|
||||
def : ReadAdvance<ReadVFWMulF, 0>;
|
||||
def : ReadAdvance<ReadVFMulAddV, 0>;
|
||||
def : ReadAdvance<ReadVFMulAddF, 0>;
|
||||
def : ReadAdvance<ReadVFWMulAddV, 0>;
|
||||
def : ReadAdvance<ReadVFWMulAddF, 0>;
|
||||
def : ReadAdvance<ReadVFSqrtV, 0>;
|
||||
def : ReadAdvance<ReadVFRecpV, 0>;
|
||||
def : ReadAdvance<ReadVFCmpV, 0>;
|
||||
def : ReadAdvance<ReadVFCmpF, 0>;
|
||||
def : ReadAdvance<ReadVFSgnjV, 0>;
|
||||
def : ReadAdvance<ReadVFSgnjF, 0>;
|
||||
def : ReadAdvance<ReadVFClassV, 0>;
|
||||
def : ReadAdvance<ReadVFMergeV, 0>;
|
||||
def : ReadAdvance<ReadVFMergeF, 0>;
|
||||
def : ReadAdvance<ReadVFMovF, 0>;
|
||||
def : ReadAdvance<ReadVFCvtIToFV, 0>;
|
||||
def : ReadAdvance<ReadVFCvtFToIV, 0>;
|
||||
def : ReadAdvance<ReadVFWCvtIToFV, 0>;
|
||||
def : ReadAdvance<ReadVFWCvtFToIV, 0>;
|
||||
def : ReadAdvance<ReadVFWCvtFToFV, 0>;
|
||||
def : ReadAdvance<ReadVFNCvtIToFV, 0>;
|
||||
def : ReadAdvance<ReadVFNCvtFToIV, 0>;
|
||||
def : ReadAdvance<ReadVFNCvtFToFV, 0>;
|
||||
|
||||
// 15. Vector Reduction Operations
|
||||
def : ReadAdvance<ReadVIRedV, 0>;
|
||||
def : ReadAdvance<ReadVIRedV0, 0>;
|
||||
def : ReadAdvance<ReadVIWRedV, 0>;
|
||||
def : ReadAdvance<ReadVIWRedV0, 0>;
|
||||
def : ReadAdvance<ReadVFRedV, 0>;
|
||||
def : ReadAdvance<ReadVFRedV0, 0>;
|
||||
def : ReadAdvance<ReadVFRedOV, 0>;
|
||||
def : ReadAdvance<ReadVFRedOV0, 0>;
|
||||
def : ReadAdvance<ReadVFWRedV, 0>;
|
||||
def : ReadAdvance<ReadVFWRedV0, 0>;
|
||||
def : ReadAdvance<ReadVFWRedOV, 0>;
|
||||
def : ReadAdvance<ReadVFWRedOV0, 0>;
|
||||
|
||||
// 16. Vector Mask Instructions
|
||||
def : ReadAdvance<ReadVMALUV, 0>;
|
||||
def : ReadAdvance<ReadVMPopV, 0>;
|
||||
def : ReadAdvance<ReadVMFFSV, 0>;
|
||||
def : ReadAdvance<ReadVMSFSV, 0>;
|
||||
def : ReadAdvance<ReadVMIotV, 0>;
|
||||
|
||||
// 17. Vector Permutation Instructions
|
||||
def : ReadAdvance<ReadVIMovVX, 0>;
|
||||
def : ReadAdvance<ReadVIMovXV, 0>;
|
||||
def : ReadAdvance<ReadVIMovXX, 0>;
|
||||
def : ReadAdvance<ReadVFMovVF, 0>;
|
||||
def : ReadAdvance<ReadVFMovFV, 0>;
|
||||
def : ReadAdvance<ReadVFMovFX, 0>;
|
||||
def : ReadAdvance<ReadVISlideV, 0>;
|
||||
def : ReadAdvance<ReadVISlideX, 0>;
|
||||
def : ReadAdvance<ReadVFSlideV, 0>;
|
||||
def : ReadAdvance<ReadVFSlideF, 0>;
|
||||
def : ReadAdvance<ReadVGatherV, 0>;
|
||||
def : ReadAdvance<ReadVGatherX, 0>;
|
||||
def : ReadAdvance<ReadVCompressV, 0>;
|
||||
def : ReadAdvance<ReadVMov1V, 0>;
|
||||
def : ReadAdvance<ReadVMov2V, 0>;
|
||||
def : ReadAdvance<ReadVMov4V, 0>;
|
||||
def : ReadAdvance<ReadVMov8V, 0>;
|
||||
|
||||
// Others
|
||||
def : ReadAdvance<ReadVMask, 0>;
|
||||
|
||||
} // Unsupported
|
||||
} // UnsupportedSchedV
|
@ -6704,17 +6704,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
||||
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
|
||||
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
|
||||
SDValue Ptr = MemIntr->getBasePtr();
|
||||
// The source constant may be larger than the subvector broadcast,
|
||||
// ensure we extract the correct subvector constants.
|
||||
if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
|
||||
Type *CstTy = Cst->getType();
|
||||
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
|
||||
if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0)
|
||||
unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
|
||||
if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
|
||||
(SizeInBits % SubVecSizeInBits) != 0)
|
||||
return false;
|
||||
unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits();
|
||||
unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits;
|
||||
unsigned NumSubVecs = SizeInBits / CstSizeInBits;
|
||||
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
|
||||
unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
|
||||
unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
|
||||
APInt UndefSubElts(NumSubElts, 0);
|
||||
SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
|
||||
APInt(SubEltSizeInBits, 0));
|
||||
APInt(CstEltSizeInBits, 0));
|
||||
for (unsigned i = 0; i != NumSubElts; ++i) {
|
||||
if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
|
||||
UndefSubElts, i))
|
||||
|
@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold]>;
|
||||
|
||||
// BinOpRM - Instructions like "adc reg, reg, [mem]".
|
||||
// There is an implicit register read at the end of the operand sequence.
|
||||
class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
|
||||
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
|
||||
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
|
||||
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
|
||||
Sched<[sched.Folded, sched.ReadAfterFold,
|
||||
// base, scale, index, offset, segment.
|
||||
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
|
||||
// implicit register read.
|
||||
sched.ReadAfterFold]>;
|
||||
|
||||
// BinOpRM_F - Instructions like "cmp reg, [mem]".
|
||||
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
SDNode opnode>
|
||||
@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
|
||||
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
SDNode opnode>
|
||||
: BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
|
||||
: BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
|
||||
[(set typeinfo.RegClass:$dst, EFLAGS,
|
||||
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
|
||||
EFLAGS))]>;
|
||||
@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
SDNode opnode>
|
||||
: BinOpMR<opcode, mnemonic, typeinfo,
|
||||
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
|
||||
(implicit EFLAGS)]>, Sched<[WriteALURMW]>;
|
||||
(implicit EFLAGS)]>, Sched<[WriteALURMW,
|
||||
// base, scale, index, offset, segment
|
||||
ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault, ReadDefault,
|
||||
WriteALU.ReadAfterFold]>; // reg
|
||||
|
||||
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
|
||||
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
: BinOpMR<opcode, mnemonic, typeinfo,
|
||||
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
|
||||
addr:$dst),
|
||||
(implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
|
||||
(implicit EFLAGS)]>, Sched<[WriteADCRMW,
|
||||
// base, scale, index, offset, segment
|
||||
ReadDefault, ReadDefault, ReadDefault,
|
||||
ReadDefault, ReadDefault,
|
||||
WriteALU.ReadAfterFold, // reg
|
||||
WriteALU.ReadAfterFold]>; // EFLAGS
|
||||
|
||||
// BinOpMR_F - Instructions like "cmp [mem], reg".
|
||||
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Instruction.h"
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/NoFolder.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) {
|
||||
return Constant::getNullValue(&Ty);
|
||||
if (C->getType()->isPointerTy() && Ty.isPointerTy())
|
||||
return ConstantExpr::getPointerCast(C, &Ty);
|
||||
if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
|
||||
return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
|
||||
if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
|
||||
return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
|
||||
if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) {
|
||||
if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
|
||||
return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
|
||||
if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
|
||||
return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
const Use *U = Worklist.pop_back_val();
|
||||
if (!Visited.insert(U).second)
|
||||
if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
|
||||
continue;
|
||||
LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
|
||||
<< *U->getUser() << "\n");
|
||||
@ -1925,49 +1928,85 @@ void Attributor::createShallowWrapper(Function &F) {
|
||||
NumFnShallowWrappersCreated++;
|
||||
}
|
||||
|
||||
bool Attributor::isInternalizable(Function &F) {
|
||||
if (F.isDeclaration() || F.hasLocalLinkage() ||
|
||||
GlobalValue::isInterposableLinkage(F.getLinkage()))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
Function *Attributor::internalizeFunction(Function &F, bool Force) {
|
||||
if (!AllowDeepWrapper && !Force)
|
||||
return nullptr;
|
||||
if (F.isDeclaration() || F.hasLocalLinkage() ||
|
||||
GlobalValue::isInterposableLinkage(F.getLinkage()))
|
||||
if (!isInternalizable(F))
|
||||
return nullptr;
|
||||
|
||||
Module &M = *F.getParent();
|
||||
FunctionType *FnTy = F.getFunctionType();
|
||||
SmallPtrSet<Function *, 2> FnSet = {&F};
|
||||
DenseMap<Function *, Function *> InternalizedFns;
|
||||
internalizeFunctions(FnSet, InternalizedFns);
|
||||
|
||||
// create a copy of the current function
|
||||
Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
|
||||
F.getName() + ".internalized");
|
||||
ValueToValueMapTy VMap;
|
||||
auto *NewFArgIt = Copied->arg_begin();
|
||||
for (auto &Arg : F.args()) {
|
||||
auto ArgName = Arg.getName();
|
||||
NewFArgIt->setName(ArgName);
|
||||
VMap[&Arg] = &(*NewFArgIt++);
|
||||
return InternalizedFns[&F];
|
||||
}
|
||||
|
||||
bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
|
||||
DenseMap<Function *, Function *> &FnMap) {
|
||||
for (Function *F : FnSet)
|
||||
if (!Attributor::isInternalizable(*F))
|
||||
return false;
|
||||
|
||||
FnMap.clear();
|
||||
// Generate the internalized version of each function.
|
||||
for (Function *F : FnSet) {
|
||||
Module &M = *F->getParent();
|
||||
FunctionType *FnTy = F->getFunctionType();
|
||||
|
||||
// Create a copy of the current function
|
||||
Function *Copied =
|
||||
Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
|
||||
F->getName() + ".internalized");
|
||||
ValueToValueMapTy VMap;
|
||||
auto *NewFArgIt = Copied->arg_begin();
|
||||
for (auto &Arg : F->args()) {
|
||||
auto ArgName = Arg.getName();
|
||||
NewFArgIt->setName(ArgName);
|
||||
VMap[&Arg] = &(*NewFArgIt++);
|
||||
}
|
||||
SmallVector<ReturnInst *, 8> Returns;
|
||||
|
||||
// Copy the body of the original function to the new one
|
||||
CloneFunctionInto(Copied, F, VMap,
|
||||
CloneFunctionChangeType::LocalChangesOnly, Returns);
|
||||
|
||||
// Set the linakage and visibility late as CloneFunctionInto has some
|
||||
// implicit requirements.
|
||||
Copied->setVisibility(GlobalValue::DefaultVisibility);
|
||||
Copied->setLinkage(GlobalValue::PrivateLinkage);
|
||||
|
||||
// Copy metadata
|
||||
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
|
||||
F->getAllMetadata(MDs);
|
||||
for (auto MDIt : MDs)
|
||||
if (!Copied->hasMetadata())
|
||||
Copied->addMetadata(MDIt.first, *MDIt.second);
|
||||
|
||||
M.getFunctionList().insert(F->getIterator(), Copied);
|
||||
Copied->setDSOLocal(true);
|
||||
FnMap[F] = Copied;
|
||||
}
|
||||
SmallVector<ReturnInst *, 8> Returns;
|
||||
|
||||
// Copy the body of the original function to the new one
|
||||
CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
|
||||
Returns);
|
||||
// Replace all uses of the old function with the new internalized function
|
||||
// unless the caller is a function that was just internalized.
|
||||
for (Function *F : FnSet) {
|
||||
auto &InternalizedFn = FnMap[F];
|
||||
auto IsNotInternalized = [&](Use &U) -> bool {
|
||||
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
|
||||
return !FnMap.lookup(CB->getCaller());
|
||||
return false;
|
||||
};
|
||||
F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
|
||||
}
|
||||
|
||||
// Set the linakage and visibility late as CloneFunctionInto has some implicit
|
||||
// requirements.
|
||||
Copied->setVisibility(GlobalValue::DefaultVisibility);
|
||||
Copied->setLinkage(GlobalValue::PrivateLinkage);
|
||||
|
||||
// Copy metadata
|
||||
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
|
||||
F.getAllMetadata(MDs);
|
||||
for (auto MDIt : MDs)
|
||||
if (!Copied->hasMetadata())
|
||||
Copied->addMetadata(MDIt.first, *MDIt.second);
|
||||
|
||||
M.getFunctionList().insert(F.getIterator(), Copied);
|
||||
F.replaceAllUsesWith(Copied);
|
||||
Copied->setDSOLocal(true);
|
||||
|
||||
return Copied;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Attributor::isValidFunctionSignatureRewrite(
|
||||
|
@ -1149,19 +1149,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
|
||||
return true;
|
||||
};
|
||||
|
||||
/// Helper struct, will support ranges eventually.
|
||||
struct OffsetInfo {
|
||||
int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
|
||||
|
||||
bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
|
||||
};
|
||||
|
||||
/// See AbstractAttribute::updateImpl(...).
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
using namespace AA::PointerInfo;
|
||||
State S = getState();
|
||||
ChangeStatus Changed = ChangeStatus::UNCHANGED;
|
||||
Value &AssociatedValue = getAssociatedValue();
|
||||
struct OffsetInfo {
|
||||
int64_t Offset = 0;
|
||||
};
|
||||
|
||||
const DataLayout &DL = A.getDataLayout();
|
||||
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
|
||||
OffsetInfoMap[&AssociatedValue] = {};
|
||||
OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
|
||||
|
||||
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
|
||||
bool &Follow) {
|
||||
@ -1219,8 +1223,48 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
|
||||
Follow = true;
|
||||
return true;
|
||||
}
|
||||
if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr))
|
||||
if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
|
||||
return HandlePassthroughUser(Usr, PtrOI, Follow);
|
||||
|
||||
// For PHIs we need to take care of the recurrence explicitly as the value
|
||||
// might change while we iterate through a loop. For now, we give up if
|
||||
// the PHI is not invariant.
|
||||
if (isa<PHINode>(Usr)) {
|
||||
// Check if the PHI is invariant (so far).
|
||||
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
|
||||
if (UsrOI == PtrOI)
|
||||
return true;
|
||||
|
||||
// Check if the PHI operand has already an unknown offset as we can't
|
||||
// improve on that anymore.
|
||||
if (PtrOI.Offset == OffsetAndSize::Unknown) {
|
||||
UsrOI = PtrOI;
|
||||
Follow = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the PHI operand is not dependent on the PHI itself.
|
||||
APInt Offset(DL.getIndexTypeSizeInBits(AssociatedValue.getType()), 0);
|
||||
if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
|
||||
DL, Offset, /* AllowNonInbounds */ true)) {
|
||||
if (Offset != PtrOI.Offset) {
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "[AAPointerInfo] PHI operand pointer offset mismatch "
|
||||
<< *CurPtr << " in " << *Usr << "\n");
|
||||
return false;
|
||||
}
|
||||
return HandlePassthroughUser(Usr, PtrOI, Follow);
|
||||
}
|
||||
|
||||
// TODO: Approximate in case we know the direction of the recurrence.
|
||||
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
|
||||
<< *CurPtr << " in " << *Usr << "\n");
|
||||
UsrOI = PtrOI;
|
||||
UsrOI.Offset = OffsetAndSize::Unknown;
|
||||
Follow = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (auto *LoadI = dyn_cast<LoadInst>(Usr))
|
||||
return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
|
||||
AccessKind::AK_READ, PtrOI.Offset, Changed,
|
||||
|
@ -4176,28 +4176,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
|
||||
ORE.emit([&]() {
|
||||
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
|
||||
return ORA << "Could not internalize function. "
|
||||
<< "Some optimizations may not be possible.";
|
||||
<< "Some optimizations may not be possible. [OMP140]";
|
||||
});
|
||||
};
|
||||
|
||||
// Create internal copies of each function if this is a kernel Module. This
|
||||
// allows iterprocedural passes to see every call edge.
|
||||
DenseSet<const Function *> InternalizedFuncs;
|
||||
if (isOpenMPDevice(M))
|
||||
DenseMap<Function *, Function *> InternalizedMap;
|
||||
if (isOpenMPDevice(M)) {
|
||||
SmallPtrSet<Function *, 16> InternalizeFns;
|
||||
for (Function &F : M)
|
||||
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
|
||||
!DisableInternalization) {
|
||||
if (Attributor::internalizeFunction(F, /* Force */ true)) {
|
||||
InternalizedFuncs.insert(&F);
|
||||
if (Attributor::isInternalizable(F)) {
|
||||
InternalizeFns.insert(&F);
|
||||
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
|
||||
EmitRemark(F);
|
||||
}
|
||||
}
|
||||
|
||||
Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
|
||||
}
|
||||
|
||||
// Look at every function in the Module unless it was internalized.
|
||||
SmallVector<Function *, 16> SCC;
|
||||
for (Function &F : M)
|
||||
if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
|
||||
if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
|
||||
SCC.push_back(&F);
|
||||
|
||||
if (SCC.empty())
|
||||
|
@ -5158,6 +5158,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
if (!isa<Constant>(Op1) && Op1Min == Op1Max)
|
||||
return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
|
||||
|
||||
// Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
|
||||
// min/max canonical compare with some other compare. That could lead to
|
||||
// conflict with select canonicalization and infinite looping.
|
||||
// FIXME: This constraint may go away if min/max intrinsics are canonical.
|
||||
auto isMinMaxCmp = [&](Instruction &Cmp) {
|
||||
if (!Cmp.hasOneUse())
|
||||
return false;
|
||||
Value *A, *B;
|
||||
SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
|
||||
if (!SelectPatternResult::isMinOrMax(SPF))
|
||||
return false;
|
||||
return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
|
||||
match(Op1, m_MaxOrMin(m_Value(), m_Value()));
|
||||
};
|
||||
if (!isMinMaxCmp(I)) {
|
||||
switch (Pred) {
|
||||
default:
|
||||
break;
|
||||
case ICmpInst::ICMP_ULT: {
|
||||
if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
// A <u C -> A == C-1 if min(A)+1 == C
|
||||
if (*CmpC == Op0Min + 1)
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC - 1));
|
||||
// X <u C --> X == 0, if the number of zero bits in the bottom of X
|
||||
// exceeds the log2 of C.
|
||||
if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
Constant::getNullValue(Op1->getType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_UGT: {
|
||||
if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
// A >u C -> A == C+1 if max(a)-1 == C
|
||||
if (*CmpC == Op0Max - 1)
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC + 1));
|
||||
// X >u C --> X != 0, if the number of zero bits in the bottom of X
|
||||
// exceeds the log2 of C.
|
||||
if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0,
|
||||
Constant::getNullValue(Op1->getType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_SLT: {
|
||||
if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC - 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_SGT: {
|
||||
if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC + 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Based on the range information we know about the LHS, see if we can
|
||||
// simplify this comparison. For example, (x&4) < 8 is always true.
|
||||
switch (Pred) {
|
||||
@ -5219,21 +5296,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
|
||||
if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
|
||||
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
|
||||
if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
// A <u C -> A == C-1 if min(A)+1 == C
|
||||
if (*CmpC == Op0Min + 1)
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC - 1));
|
||||
// X <u C --> X == 0, if the number of zero bits in the bottom of X
|
||||
// exceeds the log2 of C.
|
||||
if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
Constant::getNullValue(Op1->getType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_UGT: {
|
||||
@ -5241,21 +5303,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
|
||||
if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
|
||||
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
|
||||
if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
// A >u C -> A == C+1 if max(a)-1 == C
|
||||
if (*CmpC == Op0Max - 1)
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC + 1));
|
||||
// X >u C --> X != 0, if the number of zero bits in the bottom of X
|
||||
// exceeds the log2 of C.
|
||||
if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0,
|
||||
Constant::getNullValue(Op1->getType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_SLT: {
|
||||
@ -5263,14 +5310,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
|
||||
if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
|
||||
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
|
||||
if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC - 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_SGT: {
|
||||
@ -5278,14 +5317,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
|
||||
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
|
||||
if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
|
||||
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
|
||||
if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
|
||||
const APInt *CmpC;
|
||||
if (match(Op1, m_APInt(CmpC))) {
|
||||
if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
|
||||
ConstantInt::get(Op1->getType(), *CmpC + 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ICmpInst::ICMP_SGE:
|
||||
|
@ -261,8 +261,8 @@ private:
|
||||
|
||||
bool PointerReplacer::collectUsers(Instruction &I) {
|
||||
for (auto U : I.users()) {
|
||||
Instruction *Inst = cast<Instruction>(&*U);
|
||||
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
|
||||
auto *Inst = cast<Instruction>(&*U);
|
||||
if (auto *Load = dyn_cast<LoadInst>(Inst)) {
|
||||
if (Load->isVolatile())
|
||||
return false;
|
||||
Worklist.insert(Load);
|
||||
@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) {
|
||||
Worklist.insert(Inst);
|
||||
if (!collectUsers(*Inst))
|
||||
return false;
|
||||
} else if (isa<MemTransferInst>(Inst)) {
|
||||
} else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
|
||||
if (MI->isVolatile())
|
||||
return false;
|
||||
Worklist.insert(Inst);
|
||||
} else if (Inst->isLifetimeStartOrEnd()) {
|
||||
continue;
|
||||
|
@ -3230,7 +3230,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
|
||||
Value *Mask;
|
||||
if (match(TrueVal, m_Zero()) &&
|
||||
match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
|
||||
m_CombineOr(m_Undef(), m_Zero())))) {
|
||||
m_CombineOr(m_Undef(), m_Zero()))) &&
|
||||
(CondVal->getType() == Mask->getType())) {
|
||||
// We can remove the select by ensuring the load zeros all lanes the
|
||||
// select would have. We determine this by proving there is no overlap
|
||||
// between the load and select masks.
|
||||
|
@ -1981,6 +1981,9 @@ class LSRInstance {
|
||||
/// IV users that belong to profitable IVChains.
|
||||
SmallPtrSet<Use*, MaxChains> IVIncSet;
|
||||
|
||||
/// Induction variables that were generated and inserted by the SCEV Expander.
|
||||
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
|
||||
|
||||
void OptimizeShadowIV();
|
||||
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
|
||||
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
|
||||
@ -2085,6 +2088,9 @@ public:
|
||||
TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
|
||||
|
||||
bool getChanged() const { return Changed; }
|
||||
const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
|
||||
return ScalarEvolutionIVs;
|
||||
}
|
||||
|
||||
void print_factors_and_types(raw_ostream &OS) const;
|
||||
void print_fixups(raw_ostream &OS) const;
|
||||
@ -5589,6 +5595,11 @@ void LSRInstance::ImplementSolution(
|
||||
GenerateIVChain(Chain, Rewriter, DeadInsts);
|
||||
Changed = true;
|
||||
}
|
||||
|
||||
for (const WeakVH &IV : Rewriter.getInsertedIVs())
|
||||
if (IV && dyn_cast<Instruction>(&*IV)->getParent())
|
||||
ScalarEvolutionIVs.push_back(IV);
|
||||
|
||||
// Clean up after ourselves. This must be done before deleting any
|
||||
// instructions.
|
||||
Rewriter.clear();
|
||||
@ -5859,87 +5870,399 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addPreserved<MemorySSAWrapperPass>();
|
||||
}
|
||||
|
||||
using EqualValues = SmallVector<std::tuple<WeakVH, int64_t>, 4>;
|
||||
using EqualValuesMap =
|
||||
DenseMap<DbgValueInst *, SmallVector<std::pair<unsigned, EqualValues>>>;
|
||||
using LocationMap =
|
||||
DenseMap<DbgValueInst *, std::pair<DIExpression *, Metadata *>>;
|
||||
struct SCEVDbgValueBuilder {
|
||||
SCEVDbgValueBuilder() = default;
|
||||
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
|
||||
Values = Base.Values;
|
||||
Expr = Base.Expr;
|
||||
}
|
||||
|
||||
static void DbgGatherEqualValues(Loop *L, ScalarEvolution &SE,
|
||||
EqualValuesMap &DbgValueToEqualSet,
|
||||
LocationMap &DbgValueToLocation) {
|
||||
/// The DIExpression as we translate the SCEV.
|
||||
SmallVector<uint64_t, 6> Expr;
|
||||
/// The location ops of the DIExpression.
|
||||
SmallVector<llvm::ValueAsMetadata *, 2> Values;
|
||||
|
||||
void pushOperator(uint64_t Op) { Expr.push_back(Op); }
|
||||
void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
|
||||
|
||||
/// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
|
||||
/// in the set of values referenced by the expression.
|
||||
void pushValue(llvm::Value *V) {
|
||||
Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
|
||||
auto *It =
|
||||
std::find(Values.begin(), Values.end(), llvm::ValueAsMetadata::get(V));
|
||||
unsigned ArgIndex = 0;
|
||||
if (It != Values.end()) {
|
||||
ArgIndex = std::distance(Values.begin(), It);
|
||||
} else {
|
||||
ArgIndex = Values.size();
|
||||
Values.push_back(llvm::ValueAsMetadata::get(V));
|
||||
}
|
||||
Expr.push_back(ArgIndex);
|
||||
}
|
||||
|
||||
void pushValue(const SCEVUnknown *U) {
|
||||
llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
|
||||
pushValue(V);
|
||||
}
|
||||
|
||||
bool pushConst(const SCEVConstant *C) {
|
||||
if (C->getAPInt().getMinSignedBits() > 64)
|
||||
return false;
|
||||
Expr.push_back(llvm::dwarf::DW_OP_consts);
|
||||
Expr.push_back(C->getAPInt().getSExtValue());
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Several SCEV types are sequences of the same arithmetic operator applied
|
||||
/// to constants and values that may be extended or truncated.
|
||||
bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
|
||||
uint64_t DwarfOp) {
|
||||
assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
|
||||
"Expected arithmetic SCEV type");
|
||||
bool Success = true;
|
||||
unsigned EmitOperator = 0;
|
||||
for (auto &Op : CommExpr->operands()) {
|
||||
Success &= pushSCEV(Op);
|
||||
|
||||
if (EmitOperator >= 1)
|
||||
pushOperator(DwarfOp);
|
||||
++EmitOperator;
|
||||
}
|
||||
return Success;
|
||||
}
|
||||
|
||||
// TODO: Identify and omit noop casts.
|
||||
bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
|
||||
const llvm::SCEV *Inner = C->getOperand(0);
|
||||
const llvm::Type *Type = C->getType();
|
||||
uint64_t ToWidth = Type->getIntegerBitWidth();
|
||||
bool Success = pushSCEV(Inner);
|
||||
uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
|
||||
IsSigned ? llvm::dwarf::DW_ATE_signed
|
||||
: llvm::dwarf::DW_ATE_unsigned};
|
||||
for (const auto &Op : CastOps)
|
||||
pushOperator(Op);
|
||||
return Success;
|
||||
}
|
||||
|
||||
// TODO: MinMax - although these haven't been encountered in the test suite.
|
||||
bool pushSCEV(const llvm::SCEV *S) {
|
||||
bool Success = true;
|
||||
if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
|
||||
Success &= pushConst(StartInt);
|
||||
|
||||
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
|
||||
if (!U->getValue())
|
||||
return false;
|
||||
pushValue(U->getValue());
|
||||
|
||||
} else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
|
||||
Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
|
||||
|
||||
} else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
|
||||
Success &= pushSCEV(UDiv->getLHS());
|
||||
Success &= pushSCEV(UDiv->getRHS());
|
||||
pushOperator(llvm::dwarf::DW_OP_div);
|
||||
|
||||
} else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
|
||||
// Assert if a new and unknown SCEVCastEXpr type is encountered.
|
||||
assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
|
||||
isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
|
||||
"Unexpected cast type in SCEV.");
|
||||
Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
|
||||
|
||||
} else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
|
||||
Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
|
||||
|
||||
} else if (isa<SCEVAddRecExpr>(S)) {
|
||||
// Nested SCEVAddRecExpr are generated by nested loops and are currently
|
||||
// unsupported.
|
||||
return false;
|
||||
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return Success;
|
||||
}
|
||||
|
||||
void setFinalExpression(llvm::DbgValueInst &DI, const DIExpression *OldExpr) {
|
||||
// Re-state assumption that this dbg.value is not variadic. Any remaining
|
||||
// opcodes in its expression operate on a single value already on the
|
||||
// expression stack. Prepend our operations, which will re-compute and
|
||||
// place that value on the expression stack.
|
||||
assert(!DI.hasArgList());
|
||||
auto *NewExpr =
|
||||
DIExpression::prependOpcodes(OldExpr, Expr, /*StackValue*/ true);
|
||||
DI.setExpression(NewExpr);
|
||||
|
||||
auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(Values);
|
||||
DI.setRawLocation(llvm::DIArgList::get(DI.getContext(), ValArrayRef));
|
||||
}
|
||||
|
||||
/// If a DVI can be emitted without a DIArgList, omit DW_OP_llvm_arg and the
|
||||
/// location op index 0.
|
||||
void setShortFinalExpression(llvm::DbgValueInst &DI,
|
||||
const DIExpression *OldExpr) {
|
||||
assert((Expr[0] == llvm::dwarf::DW_OP_LLVM_arg && Expr[1] == 0) &&
|
||||
"Expected DW_OP_llvm_arg and 0.");
|
||||
DI.replaceVariableLocationOp(
|
||||
0u, llvm::MetadataAsValue::get(DI.getContext(), Values[0]));
|
||||
|
||||
// See setFinalExpression: prepend our opcodes on the start of any old
|
||||
// expression opcodes.
|
||||
assert(!DI.hasArgList());
|
||||
llvm::SmallVector<uint64_t, 6> FinalExpr(Expr.begin() + 2, Expr.end());
|
||||
auto *NewExpr =
|
||||
DIExpression::prependOpcodes(OldExpr, FinalExpr, /*StackValue*/ true);
|
||||
DI.setExpression(NewExpr);
|
||||
}
|
||||
|
||||
/// Once the IV and variable SCEV translation is complete, write it to the
|
||||
/// source DVI.
|
||||
void applyExprToDbgValue(llvm::DbgValueInst &DI,
|
||||
const DIExpression *OldExpr) {
|
||||
assert(!Expr.empty() && "Unexpected empty expression.");
|
||||
// Emit a simpler form if only a single location is referenced.
|
||||
if (Values.size() == 1 && Expr[0] == llvm::dwarf::DW_OP_LLVM_arg &&
|
||||
Expr[1] == 0) {
|
||||
setShortFinalExpression(DI, OldExpr);
|
||||
} else {
|
||||
setFinalExpression(DI, OldExpr);
|
||||
}
|
||||
}
|
||||
|
||||
/// Return true if the combination of arithmetic operator and underlying
|
||||
/// SCEV constant value is an identity function.
|
||||
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
|
||||
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
|
||||
if (C->getAPInt().getMinSignedBits() > 64)
|
||||
return false;
|
||||
int64_t I = C->getAPInt().getSExtValue();
|
||||
switch (Op) {
|
||||
case llvm::dwarf::DW_OP_plus:
|
||||
case llvm::dwarf::DW_OP_minus:
|
||||
return I == 0;
|
||||
case llvm::dwarf::DW_OP_mul:
|
||||
case llvm::dwarf::DW_OP_div:
|
||||
return I == 1;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Convert a SCEV of a value to a DIExpression that is pushed onto the
|
||||
/// builder's expression stack. The stack should already contain an
|
||||
/// expression for the iteration count, so that it can be multiplied by
|
||||
/// the stride and added to the start.
|
||||
/// Components of the expression are omitted if they are an identity function.
|
||||
/// Chain (non-affine) SCEVs are not supported.
|
||||
bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
|
||||
assert(SAR.isAffine() && "Expected affine SCEV");
|
||||
// TODO: Is this check needed?
|
||||
if (isa<SCEVAddRecExpr>(SAR.getStart()))
|
||||
return false;
|
||||
|
||||
const SCEV *Start = SAR.getStart();
|
||||
const SCEV *Stride = SAR.getStepRecurrence(SE);
|
||||
|
||||
// Skip pushing arithmetic noops.
|
||||
if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
|
||||
if (!pushSCEV(Stride))
|
||||
return false;
|
||||
pushOperator(llvm::dwarf::DW_OP_mul);
|
||||
}
|
||||
if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
|
||||
if (!pushSCEV(Start))
|
||||
return false;
|
||||
pushOperator(llvm::dwarf::DW_OP_plus);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Convert a SCEV of a value to a DIExpression that is pushed onto the
|
||||
/// builder's expression stack. The stack should already contain an
|
||||
/// expression for the iteration count, so that it can be multiplied by
|
||||
/// the stride and added to the start.
|
||||
/// Components of the expression are omitted if they are an identity function.
|
||||
bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
|
||||
ScalarEvolution &SE) {
|
||||
assert(SAR.isAffine() && "Expected affine SCEV");
|
||||
if (isa<SCEVAddRecExpr>(SAR.getStart())) {
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
|
||||
<< SAR << '\n');
|
||||
return false;
|
||||
}
|
||||
const SCEV *Start = SAR.getStart();
|
||||
const SCEV *Stride = SAR.getStepRecurrence(SE);
|
||||
|
||||
// Skip pushing arithmetic noops.
|
||||
if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
|
||||
if (!pushSCEV(Start))
|
||||
return false;
|
||||
pushOperator(llvm::dwarf::DW_OP_minus);
|
||||
}
|
||||
if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
|
||||
if (!pushSCEV(Stride))
|
||||
return false;
|
||||
pushOperator(llvm::dwarf::DW_OP_div);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct DVIRecoveryRec {
|
||||
DbgValueInst *DVI;
|
||||
DIExpression *Expr;
|
||||
Metadata *LocationOp;
|
||||
const llvm::SCEV *SCEV;
|
||||
};
|
||||
|
||||
static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
|
||||
const SCEVDbgValueBuilder &IterationCount,
|
||||
ScalarEvolution &SE) {
|
||||
// LSR may add locations to previously single location-op DVIs which
|
||||
// are currently not supported.
|
||||
if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
|
||||
return false;
|
||||
|
||||
// SCEVs for SSA values are most frquently of the form
|
||||
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
|
||||
// This is because %a is a PHI node that is not the IV. However, these
|
||||
// SCEVs have not been observed to result in debuginfo-lossy optimisations,
|
||||
// so its not expected this point will be reached.
|
||||
if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
|
||||
<< *CachedDVI.SCEV << '\n');
|
||||
|
||||
const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
|
||||
if (!Rec->isAffine())
|
||||
return false;
|
||||
|
||||
// Initialise a new builder with the iteration count expression. In
|
||||
// combination with the value's SCEV this enables recovery.
|
||||
SCEVDbgValueBuilder RecoverValue(IterationCount);
|
||||
if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
|
||||
RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
|
||||
llvm::PHINode *LSRInductionVar,
|
||||
SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
|
||||
if (DVIToUpdate.empty())
|
||||
return false;
|
||||
|
||||
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
|
||||
assert(SCEVInductionVar &&
|
||||
"Anticipated a SCEV for the post-LSR induction variable");
|
||||
|
||||
bool Changed = false;
|
||||
if (const SCEVAddRecExpr *IVAddRec =
|
||||
dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
|
||||
if (!IVAddRec->isAffine())
|
||||
return false;
|
||||
|
||||
SCEVDbgValueBuilder IterCountExpr;
|
||||
IterCountExpr.pushValue(LSRInductionVar);
|
||||
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
|
||||
return false;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
|
||||
<< '\n');
|
||||
|
||||
// Needn't salvage if the location op hasn't been undef'd by LSR.
|
||||
for (auto &DVIRec : DVIToUpdate) {
|
||||
if (!DVIRec.DVI->isUndef())
|
||||
continue;
|
||||
|
||||
// Some DVIs that were single location-op when cached are now multi-op,
|
||||
// due to LSR optimisations. However, multi-op salvaging is not yet
|
||||
// supported by SCEV salvaging. But, we can attempt a salvage by restoring
|
||||
// the pre-LSR single-op expression.
|
||||
if (DVIRec.DVI->hasArgList()) {
|
||||
if (!DVIRec.DVI->getVariableLocationOp(0))
|
||||
continue;
|
||||
llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
|
||||
DVIRec.DVI->setRawLocation(
|
||||
llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
|
||||
DVIRec.DVI->setExpression(DVIRec.Expr);
|
||||
}
|
||||
|
||||
Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
|
||||
}
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// Identify and cache salvageable DVI locations and expressions along with the
|
||||
/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
|
||||
static void
|
||||
DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
|
||||
SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
|
||||
SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
|
||||
for (auto &B : L->getBlocks()) {
|
||||
for (auto &I : *B) {
|
||||
auto DVI = dyn_cast<DbgValueInst>(&I);
|
||||
if (!DVI)
|
||||
continue;
|
||||
for (unsigned Idx = 0; Idx < DVI->getNumVariableLocationOps(); ++Idx) {
|
||||
// TODO: We can duplicate results if the same arg appears more than
|
||||
// once.
|
||||
Value *V = DVI->getVariableLocationOp(Idx);
|
||||
if (!V || !SE.isSCEVable(V->getType()))
|
||||
continue;
|
||||
auto DbgValueSCEV = SE.getSCEV(V);
|
||||
EqualValues EqSet;
|
||||
for (PHINode &Phi : L->getHeader()->phis()) {
|
||||
if (V->getType() != Phi.getType())
|
||||
continue;
|
||||
if (!SE.isSCEVable(Phi.getType()))
|
||||
continue;
|
||||
auto PhiSCEV = SE.getSCEV(&Phi);
|
||||
Optional<APInt> Offset =
|
||||
SE.computeConstantDifference(DbgValueSCEV, PhiSCEV);
|
||||
if (Offset && Offset->getMinSignedBits() <= 64)
|
||||
EqSet.emplace_back(
|
||||
std::make_tuple(&Phi, Offset.getValue().getSExtValue()));
|
||||
}
|
||||
DbgValueToEqualSet[DVI].push_back({Idx, std::move(EqSet)});
|
||||
// If we fall back to using this raw location, at least one location op
|
||||
// must be dead. A DIArgList will automatically undef arguments when
|
||||
// they become unavailable, but a ValueAsMetadata will not; since we
|
||||
// know the value should be undef, we use the undef value directly here.
|
||||
Metadata *RawLocation =
|
||||
DVI->hasArgList() ? DVI->getRawLocation()
|
||||
: ValueAsMetadata::get(UndefValue::get(
|
||||
DVI->getVariableLocationOp(0)->getType()));
|
||||
DbgValueToLocation[DVI] = {DVI->getExpression(), RawLocation};
|
||||
}
|
||||
|
||||
if (DVI->hasArgList())
|
||||
continue;
|
||||
|
||||
if (!DVI->getVariableLocationOp(0) ||
|
||||
!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
|
||||
continue;
|
||||
|
||||
SalvageableDVISCEVs.push_back(
|
||||
{DVI, DVI->getExpression(), DVI->getRawLocation(),
|
||||
SE.getSCEV(DVI->getVariableLocationOp(0))});
|
||||
DVIHandles.insert(DVI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void DbgApplyEqualValues(EqualValuesMap &DbgValueToEqualSet,
|
||||
LocationMap &DbgValueToLocation) {
|
||||
for (auto A : DbgValueToEqualSet) {
|
||||
auto *DVI = A.first;
|
||||
// Only update those that are now undef.
|
||||
if (!DVI->isUndef())
|
||||
/// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
|
||||
/// any PHi from the loop header is usable, but may have less chance of
|
||||
/// surviving subsequent transforms.
|
||||
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
|
||||
const LSRInstance &LSR) {
|
||||
// For now, just pick the first IV generated and inserted. Ideally pick an IV
|
||||
// that is unlikely to be optimised away by subsequent transforms.
|
||||
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
|
||||
if (!IV)
|
||||
continue;
|
||||
// The dbg.value may have had its value or expression changed during LSR by
|
||||
// a failed salvage attempt; refresh them from the map.
|
||||
auto *DbgDIExpr = DbgValueToLocation[DVI].first;
|
||||
DVI->setRawLocation(DbgValueToLocation[DVI].second);
|
||||
DVI->setExpression(DbgDIExpr);
|
||||
assert(DVI->isUndef() && "dbg.value with non-undef location should not "
|
||||
"have been modified by LSR.");
|
||||
for (auto IdxEV : A.second) {
|
||||
unsigned Idx = IdxEV.first;
|
||||
for (auto EV : IdxEV.second) {
|
||||
auto EVHandle = std::get<WeakVH>(EV);
|
||||
if (!EVHandle)
|
||||
continue;
|
||||
int64_t Offset = std::get<int64_t>(EV);
|
||||
DVI->replaceVariableLocationOp(Idx, EVHandle);
|
||||
if (Offset) {
|
||||
SmallVector<uint64_t, 8> Ops;
|
||||
DIExpression::appendOffset(Ops, Offset);
|
||||
DbgDIExpr = DIExpression::appendOpsToArg(DbgDIExpr, Ops, Idx, true);
|
||||
}
|
||||
DVI->setExpression(DbgDIExpr);
|
||||
break;
|
||||
}
|
||||
|
||||
assert(isa<PHINode>(&*IV) && "Expected PhI node.");
|
||||
if (SE.isSCEVable((*IV).getType())) {
|
||||
PHINode *Phi = dyn_cast<PHINode>(&*IV);
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: IV : " << *IV
|
||||
<< "with SCEV: " << *SE.getSCEV(Phi) << "\n");
|
||||
return Phi;
|
||||
}
|
||||
}
|
||||
|
||||
for (PHINode &Phi : L.getHeader()->phis()) {
|
||||
if (!SE.isSCEVable(Phi.getType()))
|
||||
continue;
|
||||
|
||||
const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
|
||||
if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
|
||||
if (!Rec->isAffine())
|
||||
continue;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
|
||||
<< " with SCEV: " << *PhiSCEV << "\n");
|
||||
return Φ
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
|
||||
@ -5948,20 +6271,21 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
|
||||
AssumptionCache &AC, TargetLibraryInfo &TLI,
|
||||
MemorySSA *MSSA) {
|
||||
|
||||
// Debug preservation - before we start removing anything identify which DVI
|
||||
// meet the salvageable criteria and store their DIExpression and SCEVs.
|
||||
SmallVector<DVIRecoveryRec, 2> SalvageableDVI;
|
||||
SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
|
||||
DbgGatherSalvagableDVI(L, SE, SalvageableDVI, DVIHandles);
|
||||
|
||||
bool Changed = false;
|
||||
std::unique_ptr<MemorySSAUpdater> MSSAU;
|
||||
if (MSSA)
|
||||
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
|
||||
|
||||
// Run the main LSR transformation.
|
||||
Changed |=
|
||||
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()).getChanged();
|
||||
|
||||
// Debug preservation - before we start removing anything create equivalence
|
||||
// sets for the llvm.dbg.value intrinsics.
|
||||
EqualValuesMap DbgValueToEqualSet;
|
||||
LocationMap DbgValueToLocation;
|
||||
DbgGatherEqualValues(L, SE, DbgValueToEqualSet, DbgValueToLocation);
|
||||
const LSRInstance &Reducer =
|
||||
LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
|
||||
Changed |= Reducer.getChanged();
|
||||
|
||||
// Remove any extra phis created by processing inner loops.
|
||||
Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
|
||||
@ -5981,8 +6305,22 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
|
||||
}
|
||||
}
|
||||
|
||||
DbgApplyEqualValues(DbgValueToEqualSet, DbgValueToLocation);
|
||||
if (SalvageableDVI.empty())
|
||||
return Changed;
|
||||
|
||||
// Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
|
||||
// expressions composed using the derived iteration count.
|
||||
// TODO: Allow for multiple IV references for nested AddRecSCEVs
|
||||
for (auto &L : LI) {
|
||||
if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
|
||||
DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVI);
|
||||
else {
|
||||
LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
|
||||
"could not be identified.\n");
|
||||
}
|
||||
}
|
||||
|
||||
DVIHandles.clear();
|
||||
return Changed;
|
||||
}
|
||||
|
||||
|
@ -2811,10 +2811,11 @@ private:
|
||||
if (BeginOffset > NewAllocaBeginOffset ||
|
||||
EndOffset < NewAllocaEndOffset)
|
||||
return false;
|
||||
// Length must be in range for FixedVectorType.
|
||||
auto *C = cast<ConstantInt>(II.getLength());
|
||||
if (C->getBitWidth() > 64)
|
||||
const uint64_t Len = C->getLimitedValue();
|
||||
if (Len > std::numeric_limits<unsigned>::max())
|
||||
return false;
|
||||
const auto Len = C->getZExtValue();
|
||||
auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
|
||||
auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
|
||||
return canConvertValue(DL, SrcTy, AllocaTy) &&
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/IR/AssemblyAnnotationWriter.h"
|
||||
@ -23,6 +24,7 @@
|
||||
#include "llvm/IR/Dominators.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstIterator.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Metadata.h"
|
||||
@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
|
||||
// to ensure we dominate all of our uses. Always insert right before the
|
||||
// relevant instruction (terminator, assume), so that we insert in proper
|
||||
// order in the case of multiple predicateinfo in the same block.
|
||||
// The number of named values is used to detect if a new declaration was
|
||||
// added. If so, that declaration is tracked so that it can be removed when
|
||||
// the analysis is done. The corner case were a new declaration results in
|
||||
// a name clash and the old name being renamed is not considered as that
|
||||
// represents an invalid module.
|
||||
if (isa<PredicateWithEdge>(ValInfo)) {
|
||||
IRBuilder<> B(getBranchTerminator(ValInfo));
|
||||
auto NumDecls = F.getParent()->getNumNamedValues();
|
||||
Function *IF = Intrinsic::getDeclaration(
|
||||
F.getParent(), Intrinsic::ssa_copy, Op->getType());
|
||||
if (NumDecls != F.getParent()->getNumNamedValues())
|
||||
PI.CreatedDeclarations.insert(IF);
|
||||
CallInst *PIC =
|
||||
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
|
||||
PI.PredicateMap.insert({PIC, ValInfo});
|
||||
@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
|
||||
// Insert the predicate directly after the assume. While it also holds
|
||||
// directly before it, assume(i1 true) is not a useful fact.
|
||||
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
|
||||
auto NumDecls = F.getParent()->getNumNamedValues();
|
||||
Function *IF = Intrinsic::getDeclaration(
|
||||
F.getParent(), Intrinsic::ssa_copy, Op->getType());
|
||||
if (NumDecls != F.getParent()->getNumNamedValues())
|
||||
PI.CreatedDeclarations.insert(IF);
|
||||
CallInst *PIC = B.CreateCall(IF, Op);
|
||||
PI.PredicateMap.insert({PIC, ValInfo});
|
||||
Result.Def = PIC;
|
||||
@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
|
||||
Builder.buildPredicateInfo();
|
||||
}
|
||||
|
||||
// Remove all declarations we created . The PredicateInfo consumers are
|
||||
// responsible for remove the ssa_copy calls created.
|
||||
PredicateInfo::~PredicateInfo() {
|
||||
// Collect function pointers in set first, as SmallSet uses a SmallVector
|
||||
// internally and we have to remove the asserting value handles first.
|
||||
SmallPtrSet<Function *, 20> FunctionPtrs;
|
||||
for (auto &F : CreatedDeclarations)
|
||||
FunctionPtrs.insert(&*F);
|
||||
CreatedDeclarations.clear();
|
||||
|
||||
for (Function *F : FunctionPtrs) {
|
||||
assert(F->user_begin() == F->user_end() &&
|
||||
"PredicateInfo consumer did not remove all SSA copies.");
|
||||
F->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
Optional<PredicateConstraint> PredicateBase::getConstraint() const {
|
||||
switch (Type) {
|
||||
case PT_Assume:
|
||||
@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addRequired<AssumptionCacheTracker>();
|
||||
}
|
||||
|
||||
// Replace ssa_copy calls created by PredicateInfo with their operand.
|
||||
static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
|
||||
for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
|
||||
const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
|
||||
auto *II = dyn_cast<IntrinsicInst>(&Inst);
|
||||
if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
|
||||
continue;
|
||||
|
||||
Inst.replaceAllUsesWith(II->getOperand(0));
|
||||
Inst.eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
|
||||
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
|
||||
@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
|
||||
PredInfo->print(dbgs());
|
||||
if (VerifyPredicateInfo)
|
||||
PredInfo->verifyPredicateInfo();
|
||||
|
||||
replaceCreatedSSACopys(*PredInfo, F);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
|
||||
auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
|
||||
PredInfo->print(OS);
|
||||
|
||||
replaceCreatedSSACopys(*PredInfo, F);
|
||||
return PreservedAnalyses::all();
|
||||
}
|
||||
|
||||
|
@ -1393,9 +1393,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
|
||||
// can ensure that IVIncrement dominates the current uses.
|
||||
PostIncLoops = SavedPostIncLoops;
|
||||
|
||||
// Remember this PHI, even in post-inc mode.
|
||||
// Remember this PHI, even in post-inc mode. LSR SCEV-based salvaging is most
|
||||
// effective when we are able to use an IV inserted here, so record it.
|
||||
InsertedValues.insert(PN);
|
||||
|
||||
InsertedIVs.push_back(PN);
|
||||
return PN;
|
||||
}
|
||||
|
||||
|
@ -5433,6 +5433,21 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
|
||||
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
|
||||
for (auto *BB : TheLoop->blocks())
|
||||
for (auto &I : *BB) {
|
||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
case Intrinsic::sideeffect:
|
||||
case Intrinsic::experimental_noalias_scope_decl:
|
||||
case Intrinsic::assume:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
if (TheLoop->hasLoopInvariantOperands(&I))
|
||||
addToWorklistIfAllowed(&I);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no pointer operand, there's nothing to do.
|
||||
auto *Ptr = getLoadStorePointerOperand(&I);
|
||||
if (!Ptr)
|
||||
@ -8916,6 +8931,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
|
||||
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
|
||||
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
|
||||
|
||||
// Even if the instruction is not marked as uniform, there are certain
|
||||
// intrinsic calls that can be effectively treated as such, so we check for
|
||||
// them here. Conservatively, we only do this for scalable vectors, since
|
||||
// for fixed-width VFs we can always fall back on full scalarization.
|
||||
if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
|
||||
switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
|
||||
case Intrinsic::assume:
|
||||
case Intrinsic::lifetime_start:
|
||||
case Intrinsic::lifetime_end:
|
||||
// For scalable vectors if one of the operands is variant then we still
|
||||
// want to mark as uniform, which will generate one instruction for just
|
||||
// the first lane of the vector. We can't scalarize the call in the same
|
||||
// way as for fixed-width vectors because we don't know how many lanes
|
||||
// there are.
|
||||
//
|
||||
// The reasons for doing it this way for scalable vectors are:
|
||||
// 1. For the assume intrinsic generating the instruction for the first
|
||||
// lane is still be better than not generating any at all. For
|
||||
// example, the input may be a splat across all lanes.
|
||||
// 2. For the lifetime start/end intrinsics the pointer operand only
|
||||
// does anything useful when the input comes from a stack object,
|
||||
// which suggests it should always be uniform. For non-stack objects
|
||||
// the effect is to poison the object, which still allows us to
|
||||
// remove the call.
|
||||
IsUniform = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
|
||||
IsUniform, IsPredicated);
|
||||
setRecipe(I, Recipe);
|
||||
|
@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
|
||||
|
||||
double AverageTime1, AverageTime2, AverageTime3;
|
||||
AverageTime1 =
|
||||
(double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
|
||||
AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
|
||||
AverageTime3 =
|
||||
(double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
|
||||
(double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
|
||||
AverageTime2 =
|
||||
(double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
|
||||
AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
|
||||
CumulativeExecutions;
|
||||
|
||||
OS << Executions;
|
||||
OS.PadToColumn(13);
|
||||
@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
|
||||
if (!PrintingTotals)
|
||||
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
|
||||
BufferSize);
|
||||
OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
|
||||
OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
|
||||
OS.PadToColumn(20);
|
||||
if (!PrintingTotals)
|
||||
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
|
||||
BufferSize);
|
||||
OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
|
||||
OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
|
||||
OS.PadToColumn(27);
|
||||
if (!PrintingTotals)
|
||||
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
|
||||
CumulativeExecutions,
|
||||
getSubTargetInfo().getSchedModel().MicroOpBufferSize);
|
||||
OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
|
||||
OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
|
||||
|
||||
if (OS.has_colors())
|
||||
OS.resetColor();
|
||||
|
@ -344,6 +344,13 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
|
||||
// link node as successor of all nodes in the prev_set if any
|
||||
npredecessors +=
|
||||
__kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
|
||||
if (dep_barrier) {
|
||||
// clean last_out and prev_set if any; don't touch last_set
|
||||
__kmp_node_deref(thread, last_out);
|
||||
info->last_out = NULL;
|
||||
__kmp_depnode_list_free(thread, prev_set);
|
||||
info->prev_set = NULL;
|
||||
}
|
||||
} else { // last_set is of different dep kind, make it prev_set
|
||||
// link node as successor of all nodes in the last_set
|
||||
npredecessors +=
|
||||
@ -353,13 +360,21 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
|
||||
info->last_out = NULL;
|
||||
// clean prev_set if any
|
||||
__kmp_depnode_list_free(thread, prev_set);
|
||||
// move last_set to prev_set, new last_set will be allocated
|
||||
info->prev_set = last_set;
|
||||
if (!dep_barrier) {
|
||||
// move last_set to prev_set, new last_set will be allocated
|
||||
info->prev_set = last_set;
|
||||
} else {
|
||||
info->prev_set = NULL;
|
||||
info->last_flag = 0;
|
||||
}
|
||||
info->last_set = NULL;
|
||||
}
|
||||
info->last_flag = dep->flag; // store dep kind of the last_set
|
||||
info->last_set = __kmp_add_node(thread, info->last_set, node);
|
||||
|
||||
// for dep_barrier last_flag value should remain:
|
||||
// 0 if last_set is empty, unchanged otherwise
|
||||
if (!dep_barrier) {
|
||||
info->last_flag = dep->flag; // store dep kind of the last_set
|
||||
info->last_set = __kmp_add_node(thread, info->last_set, node);
|
||||
}
|
||||
// check if we are processing MTX dependency
|
||||
if (dep->flag == KMP_DEP_MTX) {
|
||||
if (info->mtx_lock == NULL) {
|
||||
@ -756,8 +771,6 @@ void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
|
||||
|
||||
kmp_depnode_t node = {0};
|
||||
__kmp_init_node(&node);
|
||||
// the stack owns the node
|
||||
__kmp_node_ref(&node);
|
||||
|
||||
if (!__kmp_check_deps(gtid, &node, NULL, ¤t_task->td_dephash,
|
||||
DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
|
||||
|
@ -23,8 +23,7 @@ static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
|
||||
return;
|
||||
|
||||
kmp_int32 n = KMP_ATOMIC_DEC(&node->dn.nrefs) - 1;
|
||||
// TODO: temporarily disable assertion until the bug with dependences is fixed
|
||||
// KMP_DEBUG_ASSERT(n >= 0);
|
||||
KMP_DEBUG_ASSERT(n >= 0);
|
||||
if (n == 0) {
|
||||
KMP_ASSERT(node->dn.nrefs == 0);
|
||||
#if USE_FAST_MEMORY
|
||||
|
@ -1441,6 +1441,7 @@ kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
|
||||
if (__kmp_enable_hidden_helper) {
|
||||
auto &input_flags = reinterpret_cast<kmp_tasking_flags_t &>(flags);
|
||||
input_flags.hidden_helper = TRUE;
|
||||
input_flags.tiedness = TASK_UNTIED;
|
||||
}
|
||||
|
||||
return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
|
||||
|
Loading…
x
Reference in New Issue
Block a user