Vendor import of llvm-project branch release/12.x llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2.

This commit is contained in:
Dimitry Andric 2021-06-18 21:08:25 +02:00
parent b4125f7d51
commit e4bbddaec8
59 changed files with 914 additions and 505 deletions

View File

@ -57,6 +57,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
} else if (Feature == "+pcrelative-memops") {
HasPCRelativeMemops = true;
} else if (Feature == "+spe" || Feature == "+efpu2") {
HasStrictFP = false;
HasSPE = true;
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();

View File

@ -409,6 +409,7 @@ class InlinedOpenMPRegionRAII {
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
bool NoInheritance = false;
public:
/// Constructs region for combined constructs.
@ -416,16 +417,19 @@ class InlinedOpenMPRegionRAII {
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
OpenMPDirectiveKind Kind, bool HasCancel)
: CGF(CGF) {
OpenMPDirectiveKind Kind, bool HasCancel,
bool NoInheritance = true)
: CGF(CGF), NoInheritance(NoInheritance) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
BlockInfo = CGF.BlockInfo;
CGF.BlockInfo = nullptr;
if (NoInheritance) {
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
CGF.LambdaThisCaptureField = nullptr;
BlockInfo = CGF.BlockInfo;
CGF.BlockInfo = nullptr;
}
}
~InlinedOpenMPRegionRAII() {
@ -434,9 +438,11 @@ class InlinedOpenMPRegionRAII {
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = OldCSI;
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
CGF.BlockInfo = BlockInfo;
if (NoInheritance) {
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
CGF.BlockInfo = BlockInfo;
}
}
};
@ -3853,7 +3859,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Processing for implicitly captured variables.
InlinedOpenMPRegionRAII Region(
CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
/*HasCancel=*/false);
/*HasCancel=*/false, /*NoInheritance=*/true);
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
}
if (Type->isArrayType()) {
@ -6214,7 +6220,9 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
bool HasCancel) {
if (!CGF.HaveInsertPoint())
return;
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
InnerKind != OMPD_critical &&
InnerKind != OMPD_master);
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
}

View File

@ -6215,15 +6215,17 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
return *SanStats;
}
llvm::Value *
CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E,
CodeGenFunction &CGF) {
llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType());
auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy,
"__translate_sampler_initializer"),
{C});
auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
auto *Call = CGF.Builder.CreateCall(
CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C});
Call->setCallingConv(Call->getCalledFunction()->getCallingConv());
return Call;
}
CharUnits CodeGenModule::getNaturalPointeeTypeAlignment(

View File

@ -1917,12 +1917,12 @@ class AnnotatingParser {
if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
return true;
if (Tok.Next->is(tok::l_paren) &&
!(Tok.Previous && Tok.Previous->is(tok::identifier) &&
Tok.Previous->Previous &&
Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow,
tok::star)))
return true;
// Look for a cast `( x ) (`.
if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
if (Tok.Previous->is(tok::identifier) &&
Tok.Previous->Previous->is(tok::l_paren))
return true;
}
if (!Tok.Next->Next)
return false;

View File

@ -1281,13 +1281,6 @@ void UnwrappedLineFormatter::formatFirstToken(
if (Newlines)
Indent = NewlineIndent;
// If in Whitemsmiths mode, indent start and end of blocks
if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case,
tok::kw_default))
Indent += Style.IndentWidth;
}
// Preprocessor directives get indented before the hash only if specified
if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
(Line.Type == LT_PreprocessorDirective ||

View File

@ -579,17 +579,23 @@ size_t UnwrappedLineParser::computePPHash() const {
return h;
}
void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
bool MunchSemi) {
void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
bool MunchSemi,
bool UnindentWhitesmithsBraces) {
assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
"'{' or macro block token expected");
const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
FormatTok->setBlockKind(BK_Block);
// For Whitesmiths mode, jump to the next level prior to skipping over the
// braces.
if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
++Line->Level;
size_t PPStartHash = computePPHash();
unsigned InitialLevel = Line->Level;
nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
nextToken(/*LevelDifference=*/AddLevels);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
@ -602,10 +608,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
? (UnwrappedLine::kInvalidIndex)
: (CurrentLines->size() - 1 - NbPreprocessorDirectives);
// Whitesmiths is weird here. The brace needs to be indented for the namespace
// block, but the block itself may not be indented depending on the style
// settings. This allows the format to back up one level in those cases.
if (UnindentWhitesmithsBraces)
--Line->Level;
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
if (AddLevel)
++Line->Level;
if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
Line->Level += AddLevels;
parseLevel(/*HasOpeningBrace=*/true);
if (eof())
@ -621,7 +633,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
size_t PPEndHash = computePPHash();
// Munch the closing brace.
nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
nextToken(/*LevelDifference=*/-AddLevels);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
@ -637,6 +649,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
nextToken();
Line->Level = InitialLevel;
FormatTok->setBlockKind(BK_Block);
if (PPStartHash == PPEndHash) {
Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
@ -2128,15 +2141,34 @@ void UnwrappedLineParser::parseNamespace() {
if (ShouldBreakBeforeBrace(Style, InitialToken))
addUnwrappedLine();
bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
DeclarationScopeStack.size() > 1);
parseBlock(/*MustBeDeclaration=*/true, AddLevel);
unsigned AddLevels =
Style.NamespaceIndentation == FormatStyle::NI_All ||
(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
DeclarationScopeStack.size() > 1)
? 1u
: 0u;
bool ManageWhitesmithsBraces =
AddLevels == 0u &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
// If we're in Whitesmiths mode, indent the brace if we're not indenting
// the whole block.
if (ManageWhitesmithsBraces)
++Line->Level;
parseBlock(/*MustBeDeclaration=*/true, AddLevels,
/*MunchSemi=*/true,
/*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
// Munch the semicolon after a namespace. This is more common than one would
// think. Putting the semicolon into its own line is very ugly.
if (FormatTok->Tok.is(tok::semi))
nextToken();
addUnwrappedLine();
addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
if (ManageWhitesmithsBraces)
--Line->Level;
}
// FIXME: Add error handling.
}
@ -2222,6 +2254,11 @@ void UnwrappedLineParser::parseDoWhile() {
return;
}
// If in Whitesmiths mode, the line with the while() needs to be indented
// to the same level as the block.
if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
++Line->Level;
nextToken();
parseStructuralElement();
}
@ -2234,25 +2271,19 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
if (LeftAlignLabel)
Line->Level = 0;
bool RemoveWhitesmithsCaseIndent =
(!Style.IndentCaseBlocks &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths);
if (RemoveWhitesmithsCaseIndent)
--Line->Level;
if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
FormatTok->Tok.is(tok::l_brace)) {
CompoundStatementIndenter Indenter(
this, Line->Level, Style.BraceWrapping.AfterCaseLabel,
Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent);
CompoundStatementIndenter Indenter(this, Line->Level,
Style.BraceWrapping.AfterCaseLabel,
Style.BraceWrapping.IndentBraces);
parseBlock(/*MustBeDeclaration=*/false);
if (FormatTok->Tok.is(tok::kw_break)) {
if (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always) {
addUnwrappedLine();
if (RemoveWhitesmithsCaseIndent) {
if (!Style.IndentCaseBlocks &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
Line->Level++;
}
}
@ -2920,17 +2951,29 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
llvm::dbgs() << "\n";
}
void UnwrappedLineParser::addUnwrappedLine() {
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (Line->Tokens.empty())
return;
LLVM_DEBUG({
if (CurrentLines == &Lines)
printDebugInfo(*Line);
});
// If this line closes a block when in Whitesmiths mode, remember that
// information so that the level can be decreased after the line is added.
// This has to happen after the addition of the line since the line itself
// needs to be indented.
bool ClosesWhitesmithsBlock =
Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
CurrentLines->push_back(std::move(*Line));
Line->Tokens.clear();
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
Line->FirstStartColumn = 0;
if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
--Line->Level;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),

View File

@ -85,8 +85,9 @@ class UnwrappedLineParser {
void reset();
void parseFile();
void parseLevel(bool HasOpeningBrace);
void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
bool MunchSemi = true);
void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u,
bool MunchSemi = true,
bool UnindentWhitesmithsBraces = false);
void parseChildBlock();
void parsePPDirective();
void parsePPDefine();
@ -140,7 +141,12 @@ class UnwrappedLineParser {
bool tryToParsePropertyAccessor();
void tryToParseJSFunction();
bool tryToParseSimpleAttribute();
void addUnwrappedLine();
// Used by addUnwrappedLine to denote whether to keep or remove a level
// when resetting the line state.
enum class LineLevel { Remove, Keep };
void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
bool eof() const;
// LevelDifference is the difference of levels after and before the current
// token. For example:

View File

@ -28,7 +28,7 @@
Most SSE scalar float intrinsic operations can be performed more
efficiently as C language float scalar operations or optimized to
use vector SIMD operations. We recommend this for new applications. */
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
#endif
#ifndef _XMMINTRIN_H_INCLUDED
@ -62,14 +62,13 @@
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
typedef vector float __m128 __attribute__((__may_alias__));
/* Unaligned version of the same type. */
typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
__aligned__ (1)));
typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1)));
/* Internal data types for implementing the intrinsics. */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
typedef vector float __v4sf;
/* Create an undefined vector. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))

View File

@ -2623,7 +2623,10 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) {
return false;
const auto *CE = dyn_cast<CStyleCastExpr>(UO->getSubExpr());
if (!CE || CE->getCastKind() != CK_IntegralToPointer)
if (!CE)
return false;
if (CE->getCastKind() != CK_IntegralToPointer &&
CE->getCastKind() != CK_NullToPointer)
return false;
// The integer must be from an EnumConstantDecl.

View File

@ -99,14 +99,14 @@ struct allocator_traits
};
template <>
class allocator<void> // deprecated in C++17, removed in C++20
class allocator<void> // removed in C++20
{
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
typedef void* pointer; // deprecated in C++17
typedef const void* const_pointer; // deprecated in C++17
typedef void value_type; // deprecated in C++17
template <class _Up> struct rebind {typedef allocator<_Up> other;};
template <class _Up> struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17
};
template <class T>
@ -786,27 +786,27 @@ to_address(const _Pointer& __p) _NOEXCEPT
template <class _Tp> class allocator;
#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS)
#if _LIBCPP_STD_VER <= 17
template <>
class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<void>
class _LIBCPP_TEMPLATE_VIS allocator<void>
{
public:
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
_LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer;
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
_LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type;
template <class _Up> struct rebind {typedef allocator<_Up> other;};
template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
};
template <>
class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<const void>
class _LIBCPP_TEMPLATE_VIS allocator<const void>
{
public:
typedef const void* pointer;
typedef const void* const_pointer;
typedef const void value_type;
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer;
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type;
template <class _Up> struct rebind {typedef allocator<_Up> other;};
template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
};
#endif

View File

@ -3110,7 +3110,9 @@ size_t VersionTableSection::getSize() const {
void VersionTableSection::writeTo(uint8_t *buf) {
buf += 2;
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
write16(buf, s.sym->versionId);
// Use the original versionId for an unfetched lazy symbol (undefined weak),
// which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error).
write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId);
buf += 2;
}
}

View File

@ -256,11 +256,11 @@ executable. To disambiguate between arguments passed to lldb and arguments
passed to the debugged executable, arguments starting with a - must be passed
after --.
lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
lldb --arch x86_64 /path/to/program program argument -- --arch armv7
For convenience, passing the executable after -- is also supported.
lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
lldb --arch x86_64 -- /path/to/program program argument --arch armv7
Passing one of the attach options causes :program:`lldb` to immediately attach
to the given process.

View File

@ -751,11 +751,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) {
arguments passed to the debugged executable, arguments starting with a - must
be passed after --.
lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
lldb --arch x86_64 /path/to/program program argument -- --arch armv7
For convenience, passing the executable after -- is also supported.
lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
lldb --arch x86_64 -- /path/to/program program argument --arch armv7
Passing one of the attach options causes lldb to immediately attach to the
given process.

View File

@ -23,7 +23,12 @@
namespace llvm {
class Any {
class LLVM_EXTERNAL_VISIBILITY Any {
// The `Typeid<T>::Id` static data member below is a globally unique
// identifier for the type `T`. It is explicitly marked with default
// visibility so that when `-fvisibility=hidden` is used, the loader still
// merges duplicate definitions across DSO boundaries.
template <typename T> struct TypeId { static const char Id; };
struct StorageBase {

View File

@ -316,6 +316,7 @@ class LegalizerHelper {
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);

View File

@ -214,6 +214,10 @@ class Constant : public User {
/// both must either be scalars or vectors with the same element count. If no
/// changes are made, the constant C is returned.
static Constant *mergeUndefsWith(Constant *C, Constant *Other);
/// Return true if a constant is ConstantData or a ConstantAggregate or
/// ConstantExpr that contain only ConstantData.
bool isManifestConstant() const;
};
} // end namespace llvm

View File

@ -1808,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) {
return APF.convertToDouble();
}
static bool isManifestConstant(const Constant *c) {
if (isa<ConstantData>(c)) {
return true;
} else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
for (const Value *subc : c->operand_values()) {
if (!isManifestConstant(cast<Constant>(subc)))
return false;
}
return true;
}
return false;
}
static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
C = &CI->getValue();
@ -1845,7 +1832,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
// We know we have a "Constant" argument. But we want to only
// return true for manifest constants, not those that depend on
// constants with unknowable values, e.g. GlobalValue or BlockAddress.
if (isManifestConstant(Operands[0]))
if (Operands[0]->isManifestConstant())
return ConstantInt::getTrue(Ty->getContext());
return nullptr;
}

View File

@ -4127,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
TrueVal, FalseVal))
return V;
// If we have an equality comparison, then we know the value in one of the
// arms of the select. See if substituting this value into the arm and
// If we have a scalar equality comparison, then we know the value in one of
// the arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
// Note that the equivalence/replacement opportunity does not hold for vectors
// because each element of a vector select is chosen independently.
if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false, MaxRecurse) ==
TrueVal ||

View File

@ -344,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
if ((isa<LoadInst>(U) ||
(isa<StoreInst>(U) &&
cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
U->hasMetadata(LLVMContext::MD_invariant_group))
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}

View File

@ -10622,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
if (!dominates(RHS, IncBB))
return false;
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
// Make sure L does not refer to a value from a potentially previous
// iteration of a loop.
if (!properlyDominates(L, IncBB))
return false;
if (!ProvedEasily(L, RHS))
return false;
}

View File

@ -5150,6 +5150,9 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
return false;
}
// Limit number of instructions we look at, to avoid scanning through large
// blocks. The current limit is chosen arbitrarily.
unsigned ScanLimit = 32;
BasicBlock::const_iterator End = BB->end();
if (!PoisonOnly) {
@ -5160,6 +5163,11 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
// For example, 'udiv x, (undef | 1)' isn't UB.
for (auto &I : make_range(Begin, End)) {
if (isa<DbgInfoIntrinsic>(I))
continue;
if (--ScanLimit == 0)
break;
if (const auto *CB = dyn_cast<CallBase>(&I)) {
for (unsigned i = 0; i < CB->arg_size(); ++i) {
if (CB->paramHasAttr(i, Attribute::NoUndef) &&
@ -5186,9 +5194,12 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
for_each(V->users(), Propagate);
Visited.insert(BB);
unsigned Iter = 0;
while (Iter++ < MaxAnalysisRecursionDepth) {
while (true) {
for (auto &I : make_range(Begin, End)) {
if (isa<DbgInfoIntrinsic>(I))
continue;
if (--ScanLimit == 0)
return false;
if (mustTriggerUB(&I, YieldsPoison))
return true;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))

View File

@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
if (CI && CI->getZExtValue() == 1 &&
MRI->getType(CondLHS).getSizeInBits() == 1 &&
CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);

View File

@ -1257,22 +1257,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOUI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOSI: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTOSI:
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
return UnableToLegalize;
@ -4496,6 +4483,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 0)
return UnableToLegalize;
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
Register Src = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src);
// If all finite floats fit into the narrowed integer type, we can just swap
// out the result type. This is practically only useful for conversions from
// half to at least 16-bits, so just handle the one case.
if (SrcTy.getScalarType() != LLT::scalar(16) ||
NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
return UnableToLegalize;
Observer.changingInstr(MI);
narrowScalarDst(MI, NarrowTy, 0,
IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
Observer.changedInstr(MI);
return Legalized;
}
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {

View File

@ -7105,14 +7105,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (LegalOperations)
return SDValue();
// Collect all the stores in the chain.
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
// TODO: Allow unordered atomics when wider type is legal (see D66309)
EVT MemVT = Store->getMemoryVT();
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!Store->isSimple() || Store->isIndexed())
// We only handle merging simple stores of 1-4 bytes.
// TODO: Allow unordered atomics when wider type is legal (see D66309)
EVT MemVT = N->getMemoryVT();
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
!N->isSimple() || N->isIndexed())
return SDValue();
// Collect all of the stores in the chain.
SDValue Chain = N->getChain();
SmallVector<StoreSDNode *, 8> Stores = {N};
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
// TODO: We could allow multiple sizes by tracking each stored byte.
if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();

View File

@ -1691,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
/// terminator, but additionally the copies that move the vregs into the
/// physical registers.
static MachineBasicBlock::iterator
FindSplitPointForStackProtector(MachineBasicBlock *BB) {
FindSplitPointForStackProtector(MachineBasicBlock *BB,
const TargetInstrInfo &TII) {
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
//
if (SplitPoint == BB->begin())
return SplitPoint;
@ -1701,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
MachineBasicBlock::iterator Previous = SplitPoint;
--Previous;
if (TII.isTailCall(*SplitPoint) &&
Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
// call itself, then we must insert before the sequence even starts. For
// example:
// <split point>
// ADJCALLSTACKDOWN ...
// <Moves>
// ADJCALLSTACKUP ...
// TAILJMP somewhere
// On the other hand, it could be an unrelated call in which case this tail call
// has to register moves of its own and should be the split point. For example:
// ADJCALLSTACKDOWN
// CALL something_else
// ADJCALLSTACKUP
// <split point>
// TAILJMP somewhere
do {
--Previous;
if (Previous->isCall())
return SplitPoint;
} while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
return Previous;
}
while (MIIsInTerminatorSequence(*Previous)) {
SplitPoint = Previous;
if (Previous == Start)
@ -1740,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
FindSplitPointForStackProtector(ParentMBB);
FindSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@ -1759,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
FindSplitPointForStackProtector(ParentMBB);
FindSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,

View File

@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() {
// instrumentation has already been generated.
HasIRCheck = true;
// If we're instrumenting a block with a musttail call, the check has to be
// inserted before the call rather than between it and the return. The
// verifier guarantees that a musttail call is either directly before the
// return or with a single correct bitcast of the return value in between so
// we don't need to worry about many situations here.
Instruction *CheckLoc = RI;
Instruction *Prev = RI->getPrevNonDebugInstruction();
if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
CheckLoc = Prev;
else if (Prev) {
Prev = Prev->getPrevNonDebugInstruction();
if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
CheckLoc = Prev;
}
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
IRBuilder<> B(RI);
IRBuilder<> B(CheckLoc);
LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
Call->setAttributes(GuardCheck->getAttributes());
Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
// If we do not support SelectionDAG based tail calls, generate IR level
// tail calls.
// If we do not support SelectionDAG based calls, generate IR level
// calls.
//
// For each block with a return instruction, convert this:
//
@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
BasicBlock *NewBB =
BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {

View File

@ -803,6 +803,18 @@ Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) {
return C;
}
bool Constant::isManifestConstant() const {
if (isa<ConstantData>(this))
return true;
if (isa<ConstantAggregate>(this) || isa<ConstantExpr>(this)) {
for (const Value *Op : operand_values())
if (!cast<Constant>(Op)->isManifestConstant())
return false;
return true;
}
return false;
}
//===----------------------------------------------------------------------===//
// ConstantInt
//===----------------------------------------------------------------------===//

View File

@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (TargetObjectWriter->getEMachine() == ELF::EM_386 &&
Type == ELF::R_386_GOTOFF)
return true;
// ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so
// it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an
// R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in
// range of a MergeInputSection. We could introduce a new RelExpr member
// (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12)
// but the complexity is unnecessary given that GNU as keeps the original
// symbol for this case as well.
if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS &&
!hasRelocationAddend())
return true;
}
// Most TLS relocations use a got, so they need the symbol. Even those that

View File

@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
size_t FirstLineIndentedBy) {
const StringRef ValHelpPrefix = " ";
assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
assert(BaseIndent >= FirstLineIndentedBy);
std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
outs().indent(BaseIndent - FirstLineIndentedBy)
<< ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";

View File

@ -16335,25 +16335,36 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
// Nand not supported in LSE.
if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
// Leave 128 bits to LLSC.
if (Subtarget->hasLSE() && Size < 128)
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics() && Size < 128) {
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
// (2) low level libgcc and compiler-rt support implemented by:
// min/max outline atomics helpers
if (AI->getOperation() != AtomicRMWInst::Min &&
AI->getOperation() != AtomicRMWInst::Max &&
AI->getOperation() != AtomicRMWInst::UMin &&
AI->getOperation() != AtomicRMWInst::UMax) {
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
// (2) low level libgcc and compiler-rt support implemented by:
// min/max outline atomics helpers
if (AI->getOperation() != AtomicRMWInst::Min &&
AI->getOperation() != AtomicRMWInst::Max &&
AI->getOperation() != AtomicRMWInst::UMin &&
AI->getOperation() != AtomicRMWInst::UMax) {
return AtomicExpansionKind::None;
}
}
}
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}

View File

@ -1791,7 +1791,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
NegOpc = AArch64::NEGv8i16;
} else if (Ty == LLT::vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
NegOpc = AArch64::NEGv8i16;
NegOpc = AArch64::NEGv16i8;
} else if (Ty == LLT::vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;

View File

@ -5934,6 +5934,9 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|| AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
|| AddrMode == ARMII::AddrModeT2_pc // PCrel access
|| AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
|| AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
|| AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
|| AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
|| AddrMode == ARMII::AddrModeNone)
return false;
@ -5976,6 +5979,10 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT2_i8s4:
// FIXME: Values are already scaled in this addressing mode.
assert((Fixup & 3) == 0 && "Can't encode this offset!");
NumBits = 10;
break;
case ARMII::AddrModeT2_ldrex:
NumBits = 8;
Scale = 4;
@ -5984,17 +5991,6 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
case ARMII::AddrMode_i12:
NumBits = 12;
break;
case ARMII::AddrModeT2_i7:
NumBits = 7;
break;
case ARMII::AddrModeT2_i7s2:
NumBits = 7;
Scale = 2;
break;
case ARMII::AddrModeT2_i7s4:
NumBits = 7;
Scale = 4;
break;
case ARMII::AddrModeT1_s: // SP-relative LD/ST
NumBits = 8;
Scale = 4;
@ -6004,8 +6000,8 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
}
// Make sure the offset is encodable for instructions that scale the
// immediate.
if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0)
return false;
assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
"Can't encode this offset!");
OffVal += Fixup / Scale;
unsigned Mask = (1 << NumBits) - 1;

View File

@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
SrcReg = MI.getOperand(1).getReg();
if (!MRI->hasOneNonDBGUse(SrcReg))
continue;
MI2 = MRI->getVRegDef(SrcReg);
DstReg = MI.getOperand(0).getReg();

View File

@ -85,8 +85,17 @@ static bool BPFPreserveDITypeImpl(Function &F) {
} else {
Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
DIType *Ty = cast<DIType>(MD);
while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
unsigned Tag = DTy->getTag();
if (Tag != dwarf::DW_TAG_const_type &&
Tag != dwarf::DW_TAG_volatile_type)
break;
Ty = DTy->getBaseType();
}
if (Ty->getName().empty())
report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
MD = Ty;
}
BasicBlock *BB = Call->getParent();

View File

@ -12,6 +12,7 @@
#include "BPFTargetMachine.h"
#include "BPF.h"
#include "BPFTargetTransformInfo.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
@ -145,6 +146,11 @@ void BPFPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
}
TargetTransformInfo
BPFTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(BPFTTIImpl(this, F));
}
// Install an instruction selector pass using
// the ISelDag to gen BPF code.
bool BPFPassConfig::addInstSelector() {

View File

@ -34,6 +34,8 @@ class BPFTargetMachine : public LLVMTargetMachine {
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}

View File

@ -0,0 +1,61 @@
//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file uses the target's specific information to
// provide more precise answers to certain TTI queries, while letting the
// target independent and default TTI implementations handle the rest.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
#include "BPFTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
namespace llvm {
class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
typedef BasicTTIImplBase<BPFTTIImpl> BaseT;
typedef TargetTransformInfo TTI;
friend BaseT;
const BPFSubtarget *ST;
const BPFTargetLowering *TLI;
const BPFSubtarget *getST() const { return ST; }
const BPFTargetLowering *getTLI() const { return TLI; }
public:
explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
return TTI::TCC_Free;
return TTI::TCC_Basic;
}
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const llvm::Instruction *I = nullptr) {
if (Opcode == Instruction::Select)
return SCEVCheapExpansionBudget;
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
}
};
} // end namespace llvm
#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H

View File

@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC)
HANDLE_BTF_KIND(13, FUNC_PROTO)
HANDLE_BTF_KIND(14, VAR)
HANDLE_BTF_KIND(15, DATASEC)
HANDLE_BTF_KIND(16, FLOAT)
#undef HANDLE_BTF_KIND

View File

@ -371,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) {
}
}
BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName)
: Name(TypeName) {
Kind = BTF::BTF_KIND_FLOAT;
BTFType.Info = Kind << 24;
BTFType.Size = roundupToBytes(SizeInBits);
}
void BTFTypeFloat::completeType(BTFDebug &BDebug) {
if (IsCompleted)
return;
IsCompleted = true;
BTFType.NameOff = BDebug.addString(Name);
}
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@ -409,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
}
void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
// Only int types are supported in BTF.
// Only int and binary floating point types are supported in BTF.
uint32_t Encoding = BTy->getEncoding();
if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
Encoding != dwarf::DW_ATE_signed_char &&
Encoding != dwarf::DW_ATE_unsigned &&
Encoding != dwarf::DW_ATE_unsigned_char)
std::unique_ptr<BTFTypeBase> TypeEntry;
switch (Encoding) {
case dwarf::DW_ATE_boolean:
case dwarf::DW_ATE_signed:
case dwarf::DW_ATE_signed_char:
case dwarf::DW_ATE_unsigned:
case dwarf::DW_ATE_unsigned_char:
// Create a BTF type instance for this DIBasicType and put it into
// DIToIdMap for cross-type reference check.
TypeEntry = std::make_unique<BTFTypeInt>(
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
break;
case dwarf::DW_ATE_float:
TypeEntry =
std::make_unique<BTFTypeFloat>(BTy->getSizeInBits(), BTy->getName());
break;
default:
return;
}
// Create a BTF type instance for this DIBasicType and put it into
// DIToIdMap for cross-type reference check.
auto TypeEntry = std::make_unique<BTFTypeInt>(
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
TypeId = addType(std::move(TypeEntry), BTy);
}
@ -1171,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
if (Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::ExternalLinkage &&
Linkage != GlobalValue::WeakAnyLinkage &&
Linkage != GlobalValue::WeakODRLinkage &&
Linkage != GlobalValue::ExternalWeakLinkage)
continue;
@ -1199,8 +1225,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
const DataLayout &DL = Global.getParent()->getDataLayout();
uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global),
Size);
DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
Asm->getSymbol(&Global), Size);
}
}
@ -1278,7 +1304,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
uint8_t Scope = BTF::FUNC_EXTERN;
auto FuncTypeEntry =
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
addType(std::move(FuncTypeEntry));
uint32_t FuncId = addType(std::move(FuncTypeEntry));
if (F->hasSection()) {
StringRef SecName = F->getSection();
if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
DataSecEntries[std::string(SecName)] =
std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
}
// We really don't know func size, set it to 0.
DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId,
Asm->getSymbol(F), 0);
}
}
void BTFDebug::endModule() {

View File

@ -187,7 +187,7 @@ class BTFKindDataSec : public BTFTypeBase {
uint32_t getSize() override {
return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
}
void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
Vars.push_back(std::make_tuple(Id, Sym, Size));
}
std::string getName() { return Name; }
@ -195,6 +195,15 @@ class BTFKindDataSec : public BTFTypeBase {
void emitType(MCStreamer &OS) override;
};
/// Handle binary floating point type.
class BTFTypeFloat : public BTFTypeBase {
StringRef Name;
public:
BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName);
void completeType(BTFDebug &BDebug) override;
};
/// String table.
class BTFStringTable {
/// String table size in bytes.

View File

@ -321,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
O << "0, ";
printOperand(MI, OpNo, O);
return false;
case 'I':
// Write 'i' if an integer constant, otherwise nothing. Used to print
// addi vs add, etc.
if (MI->getOperand(OpNo).isImm())
O << "i";
return false;
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
// FIXME: Currently for PowerPC memory operands are always loaded

View File

@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
: PPC::PROBED_STACKALLOC_32))
.addDef(ScratchReg)
.addDef(TempReg) // TempReg stores the old sp.
.addDef(TempReg)
.addDef(ScratchReg) // ScratchReg stores the old sp.
.addImm(NegFrameSize);
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
// update the ScratchReg to meet the assumption that ScratchReg contains
// the NegFrameSize. This solution is rather tricky.
if (!HasRedZone) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
.addReg(TempReg)
.addReg(ScratchReg)
.addReg(SPReg);
HasSTUX = true;
}
@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
// TODO: Generate CFI instructions.
bool isPPC64 = Subtarget.isPPC64();
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
bool HasBP = RegInfo->hasBasePointer(MF);
Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
.addReg(SPReg)
.addReg(NegSizeReg);
};
// Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
// when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
// available and r1 is already copied to r30 which is BPReg. So BPReg stores
// the value of stackptr.
// First we have to probe tail interval whose size is less than probesize,
// i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
// ScratchReg stores the value of ((stackptr % align) % probesize). Then we
// probe each block sized probesize until stackptr meets
// (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
// as negprobesize. At both stages, TempReg stores the value of
// (stackptr - (stackptr % align)).
auto dynamicProbe = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, Register ScratchReg,
Register TempReg) {
assert(HasBP && isPPC64 && "Probe alignment part not available");
// Used to probe stack when realignment is required.
// Note that, according to ABI's requirement, *sp must always equals the
// value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
// Following is pseudo code:
// final_sp = (sp & align) + negframesize;
// neg_gap = final_sp - sp;
// while (neg_gap < negprobesize) {
// stdu fp, negprobesize(sp);
// neg_gap -= negprobesize;
// }
// stdux fp, sp, neg_gap
//
// When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
// before probe code, we don't need to save it, so we get one additional reg
// that can be used to materialize the probeside if needed to use xform.
// Otherwise, we can NOT materialize probeside, so we can only use Dform for
// now.
//
// The allocations are:
// if (HasBP && HasRedzone) {
// r0: materialize the probesize if needed so that we can use xform.
// r12: `neg_gap`
// } else {
// r0: back-chain pointer
// r12: `neg_gap`.
// }
auto probeRealignedStack = [&](MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Register ScratchReg, Register TempReg) {
assert(HasBP && "The function is supposed to have base pointer when its "
"stack is realigned.");
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
// ScratchReg = stackptr % align
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
.addReg(BPReg)
.addImm(0)
.addImm(64 - Log2(MaxAlign));
// TempReg = stackptr - (stackptr % align)
BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
.addReg(ScratchReg)
.addReg(BPReg);
// ScratchReg = (stackptr % align) % probesize
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
.addReg(ScratchReg)
.addImm(0)
.addImm(64 - Log2(ProbeSize));
// FIXME: We can eliminate this limitation if we get more infomation about
// which part of redzone are already used. Used redzone can be treated
// probed. But there might be `holes' in redzone probed, this could
// complicate the implementation.
assert(ProbeSize >= Subtarget.getRedZoneSize() &&
"Probe size should be larger or equal to the size of red-zone so "
"that red-zone is not clobbered by probing.");
Register &FinalStackPtr = TempReg;
// FIXME: We only support NegProbeSize materializable by DForm currently.
// When HasBP && HasRedzone, we can use xform if we have an additional idle
// register.
NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
assert(isInt<16>(NegProbeSize) &&
"NegProbeSize should be materializable by DForm");
Register CRReg = PPC::CR0;
// If (stackptr % align) % probesize == 0, we should not generate probe
// code. Layout of output assembly kinda like:
// Layout of output assembly kinda like:
// bb.0:
// ...
// cmpldi $scratchreg, 0
// beq bb.2
// bb.1: # Probe tail interval
// neg $scratchreg, $scratchreg
// stdux $bpreg, r1, $scratchreg
// sub $scratchreg, $finalsp, r1
// cmpdi $scratchreg, <negprobesize>
// bge bb.2
// bb.1:
// stdu <backchain>, <negprobesize>(r1)
// sub $scratchreg, $scratchreg, negprobesize
// cmpdi $scratchreg, <negprobesize>
// blt bb.1
// bb.2:
// <materialize negprobesize into $scratchreg>
// cmpd r1, $tempreg
// beq bb.4
// bb.3: # Loop to probe each block
// stdux $bpreg, r1, $scratchreg
// cmpd r1, $tempreg
// bne bb.3
// bb.4:
// ...
// stdux <backchain>, r1, $scratchreg
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeResidualMBB);
MachineBasicBlock *ProbeLoopPreHeaderMBB =
MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeExitMBB);
// bb.4
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
// bb.0
BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
BuildMI(&MBB, DL, TII.get(PPC::BCC))
.addImm(PPC::PRED_EQ)
.addReg(CRReg)
.addMBB(ProbeLoopPreHeaderMBB);
MBB.addSuccessor(ProbeResidualMBB);
MBB.addSuccessor(ProbeLoopPreHeaderMBB);
// bb.1
BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
.addReg(ScratchReg);
allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
false, BPReg);
ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
// bb.2
MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
NegProbeSize, ScratchReg);
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
.addReg(SPReg)
.addReg(TempReg);
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
.addImm(PPC::PRED_EQ)
.addReg(CRReg)
.addMBB(ProbeExitMBB);
ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
// bb.3
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
false, BPReg);
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
.addReg(SPReg)
.addReg(TempReg);
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
.addImm(PPC::PRED_NE)
.addReg(CRReg)
.addMBB(ProbeLoopBodyMBB);
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
{
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
BackChainPointer);
if (HasRedZone)
// PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
// to TempReg to satisfy it.
BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
.addReg(BPReg)
.addReg(BPReg);
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
}
// bb.0
{
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
.addReg(SPReg)
.addReg(FinalStackPtr);
if (!HasRedZone)
BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
.addReg(ScratchReg)
.addImm(NegProbeSize);
BuildMI(&MBB, DL, TII.get(PPC::BCC))
.addImm(PPC::PRED_GE)
.addReg(CRReg)
.addMBB(ProbeExitMBB);
MBB.addSuccessor(ProbeLoopBodyMBB);
MBB.addSuccessor(ProbeExitMBB);
}
// bb.1
{
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
0, true /*UseDForm*/, BackChainPointer);
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
ScratchReg)
.addReg(ScratchReg)
.addImm(-NegProbeSize);
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
CRReg)
.addReg(ScratchReg)
.addImm(NegProbeSize);
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
.addImm(PPC::PRED_LT)
.addReg(CRReg)
.addMBB(ProbeLoopBodyMBB);
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
}
// Update liveins.
recomputeLiveIns(*ProbeResidualMBB);
recomputeLiveIns(*ProbeLoopPreHeaderMBB);
recomputeLiveIns(*ProbeLoopBodyMBB);
recomputeLiveIns(*ProbeExitMBB);
return ProbeExitMBB;
};
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
// SP = SP - SP % MaxAlign.
// SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
// the offset subtracted from SP is determined by SP's runtime value.
if (HasBP && MaxAlign > 1) {
// FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
// 64-bit mode.
if (isPPC64) {
// Use BPReg to calculate CFA.
if (needsCFI)
buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
// Since we have SPReg copied to BPReg at the moment, FPReg can be used as
// TempReg.
Register TempReg = FPReg;
CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
// Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
.addReg(BPReg)
.addReg(BPReg);
} else {
// Initialize current frame pointer.
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
// Calculate final stack pointer.
if (isPPC64)
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
.addReg(SPReg)
.addReg(SPReg);
// Use FPReg to calculate CFA.
if (needsCFI)
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
.addImm(0)
.addImm(64 - Log2(MaxAlign));
else
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
.addReg(FPReg)
.addReg(SPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
.addReg(ScratchReg)
.addReg(SPReg);
}
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
FPReg)
.addReg(ScratchReg)
.addReg(SPReg);
MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
FPReg)
.addReg(ScratchReg)
.addReg(FPReg);
CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
if (needsCFI)
buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
} else {
// Initialize current frame pointer.
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Use FPReg to calculate CFA.
if (needsCFI)
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
}
// Probe residual part.
if (NegResidualSize) {
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
if (!ResidualUseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
ResidualUseDForm, FPReg);
}
bool UseDForm = CanUseDForm(NegProbeSize);
// If number of blocks is small, just probe them directly.
if (NumBlocks < 3) {
if (!UseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
for (int i = 0; i < NumBlocks; ++i)
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
FPReg);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
// Probe residual part.
if (NegResidualSize) {
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
if (!ResidualUseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
ResidualUseDForm, FPReg);
}
} else {
// Since CTR is a volatile register and current shrinkwrap implementation
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
// CTR loop to probe.
// Calculate trip count and stores it in CTRReg.
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
.addReg(ScratchReg, RegState::Kill);
if (!UseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
// Create MBBs of the loop.
MachineFunction::iterator MBBInsertPoint =
std::next(CurrentMBB->getIterator());
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, LoopMBB);
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ExitMBB);
// Synthesize the loop body.
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
UseDForm, FPReg);
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
.addMBB(LoopMBB);
LoopMBB->addSuccessor(ExitMBB);
LoopMBB->addSuccessor(LoopMBB);
// Synthesize the exit MBB.
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
std::next(MachineBasicBlock::iterator(MI)),
CurrentMBB->end());
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
CurrentMBB->addSuccessor(LoopMBB);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
bool UseDForm = CanUseDForm(NegProbeSize);
// If number of blocks is small, just probe them directly.
if (NumBlocks < 3) {
if (!UseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
for (int i = 0; i < NumBlocks; ++i)
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
FPReg);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
}
} else {
// Since CTR is a volatile register and current shrinkwrap implementation
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
// CTR loop to probe.
// Calculate trip count and stores it in CTRReg.
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
.addReg(ScratchReg, RegState::Kill);
if (!UseDForm)
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
// Create MBBs of the loop.
MachineFunction::iterator MBBInsertPoint =
std::next(CurrentMBB->getIterator());
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, LoopMBB);
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ExitMBB);
// Synthesize the loop body.
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
UseDForm, FPReg);
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
.addMBB(LoopMBB);
LoopMBB->addSuccessor(ExitMBB);
LoopMBB->addSuccessor(LoopMBB);
// Synthesize the exit MBB.
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
std::next(MachineBasicBlock::iterator(MI)),
CurrentMBB->end());
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
CurrentMBB->addSuccessor(LoopMBB);
if (needsCFI) {
// Restore using SPReg to calculate CFA.
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
}
// Update liveins.
recomputeLiveIns(*LoopMBB);
recomputeLiveIns(*ExitMBB);
}
// Update liveins.
recomputeLiveIns(*LoopMBB);
recomputeLiveIns(*ExitMBB);
}
++NumPrologProbed;
MI.eraseFromParent();

View File

@ -167,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
// Custom lower inline assembly to check for special registers.
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@ -3461,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
return Op.getOperand(0);
}
SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
assert((Op.getOpcode() == ISD::INLINEASM ||
Op.getOpcode() == ISD::INLINEASM_BR) &&
"Expecting Inline ASM node.");
// If an LR store is already known to be required then there is not point in
// checking this ASM as well.
if (MFI.isLRStoreRequired())
return Op;
// Inline ASM nodes have an optional last operand that is an incoming Flag of
// type MVT::Glue. We want to ignore this last operand if that is the case.
unsigned NumOps = Op.getNumOperands();
if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
--NumOps;
// Check all operands that may contain the LR.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
++i; // Skip the ID value.
switch (InlineAsm::getKind(Flags)) {
default:
llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegUse:
case InlineAsm::Kind_Imm:
case InlineAsm::Kind_Mem:
i += NumVals;
break;
case InlineAsm::Kind_Clobber:
case InlineAsm::Kind_RegDef:
case InlineAsm::Kind_RegDefEarlyClobber: {
for (; NumVals; --NumVals, ++i) {
Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
if (Reg != PPC::LR && Reg != PPC::LR8)
continue;
MFI.setLRStoreRequired();
return Op;
}
break;
}
}
}
return Op;
}
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget.isAIXABI())
@ -10316,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::INLINEASM:
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
// Variable argument lowering.
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
@ -15090,6 +15147,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
return std::make_pair(0U, &PPC::VSFRCRegClass);
} else if (Constraint == "lr") {
if (VT == MVT::i64)
return std::make_pair(0U, &PPC::LR8RCRegClass);
else
return std::make_pair(0U, &PPC::LRRCRegClass);
}
// If we name a VSX register, we can't defer to the base class because it

View File

@ -1128,6 +1128,7 @@ namespace llvm {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;

View File

@ -173,7 +173,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index,
[!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>,
DwarfRegNum<[0, 0]>;
DwarfRegNum<[-1, -1]>;
}
// VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63).
@ -181,7 +181,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
def VSRp#!add(!srl(Index, 1), 16) :
VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32),
[!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>,
DwarfRegNum<[0, 0]>;
DwarfRegNum<[-1, -1]>;
}
}
@ -409,20 +409,27 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
let isAllocatable = 0;
}
def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> {
let isAllocatable = 0;
}
def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> {
let isAllocatable = 0;
}
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1;
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
ACC4, ACC5, ACC6, ACC7)> {
@ -430,14 +437,14 @@ def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def UACCRC : RegisterClass<"PPC", [v512i1], 128,
(add UACC0, UACC1, UACC2, UACC3,

View File

@ -1212,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
bool PPCTTIImpl::areFunctionArgsABICompatible(
const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const {
// We need to ensure that argument promotion does not
// attempt to promote pointers to MMA types (__vector_pair
// and __vector_quad) since these types explicitly cannot be
// passed as arguments. Both of these types are larger than
// the 128-bit Altivec vectors and have a scalar size of 1 bit.
if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
return false;
return llvm::none_of(Args, [](Argument *A) {
auto *EltTy = cast<PointerType>(A->getType())->getElementType();
if (EltTy->isSized())
return (EltTy->isIntOrIntVectorTy(1) &&
EltTy->getPrimitiveSizeInBits() > 128);
return false;
});
}
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {

View File

@ -129,6 +129,9 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
bool areFunctionArgsABICompatible(const Function *Caller,
const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const;
/// @}
};

View File

@ -3909,10 +3909,10 @@ foreach vti = AllIntegerVectors in {
(DecImm simm5_plus1:$rs2),
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask VR:$merge),
(vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
@ -3922,17 +3922,17 @@ foreach vti = AllIntegerVectors in {
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask VR:$merge),
(vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
@ -3950,11 +3950,11 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar 0),
(vti.Mask VR:$merge),
(XLenVT (VLOp GPR:$vl)))),
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar 0),
(vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,

View File

@ -285,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
// FIXME: Can we support these natively?
// Expand 128 bit shifts without using a libcall.
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

View File

@ -885,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
SmallVector<Value *, 16> FMCArgs;
for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
Constant *Clause = LPI->getClause(I);
// As a temporary workaround for the lack of aggregate varargs support
// in the interface between JS and wasm, break out filter operands into
// their component elements.
if (LPI->isFilter(I)) {
auto *ATy = cast<ArrayType>(Clause->getType());
for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
FMCArgs.push_back(EV);
}
} else
// TODO Handle filters (= exception specifications).
// https://bugs.llvm.org/show_bug.cgi?id=50396
if (LPI->isCatch(I))
FMCArgs.push_back(Clause);
}

View File

@ -37889,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
// replicating low and high halves (and without changing the type/length of
// the vector), we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector())
return SDValue();
if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X

View File

@ -1344,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
// be copying from a truncate. Any other 32-bit operation will zero-extend
// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
// 32 bits, they're probably just qualifying a CopyFromReg.
// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
// anything about the upper 32 bits, they're probably just qualifying a
// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
// operation will zero-extend up to 64 bits.
def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
N->getOpcode() != ISD::AssertSext &&
N->getOpcode() != ISD::AssertZext;
N->getOpcode() != ISD::AssertZext &&
N->getOpcode() != ISD::AssertAlign &&
N->getOpcode() != ISD::FREEZE;
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,

View File

@ -3221,11 +3221,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
// ~(X - Y) --> ~X + Y
if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
if (isa<Constant>(X) || NotVal->hasOneUse())
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
// ~(~X >>s Y) --> (X >>s Y)
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
@ -3256,9 +3251,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
}
// ~(X + C) --> -(C + 1) - X
if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
// ~(X + C) --> ~C - X
if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C))))
return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X);
// ~(X - Y) --> ~X + Y
// FIXME: is it really beneficial to sink the `not` here?
if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
if (isa<Constant>(X) || NotVal->hasOneUse())
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
// ~(~X + Y) --> X - Y
if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y))))

View File

@ -1095,7 +1095,10 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
/// TODO: Wrapping flags could be preserved in some cases with better analysis.
Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
ICmpInst &Cmp) {
if (!Cmp.isEquality())
// Value equivalence substitution requires an all-or-nothing replacement.
// It does not make sense for a vector compare where each lane is chosen
// independently.
if (!Cmp.isEquality() || Cmp.getType()->isVectorTy())
return nullptr;
// Canonicalize the pattern to ICMP_EQ by swapping the select operands.

View File

@ -21,6 +21,30 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
Value *ShAmt1) {
// We have two shift amounts from two different shifts. The types of those
// shift amounts may not match. If that's the case let's bailout now..
if (ShAmt0->getType() != ShAmt1->getType())
return false;
// As input, we have the following pattern:
// Sh0 (Sh1 X, Q), K
// We want to rewrite that as:
// Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
// While we know that originally (Q+K) would not overflow
// (because 2 * (N-1) u<= iN -1), we have looked past extensions of
// shift amounts. so it may now overflow in smaller bitwidth.
// To ensure that does not happen, we need to ensure that the total maximal
// shift amount is still representable in that smaller bit width.
unsigned MaximalPossibleTotalShiftAmount =
(Sh0->getType()->getScalarSizeInBits() - 1) +
(Sh1->getType()->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
}
// Given pattern:
// (x shiftopcode Q) shiftopcode K
// we should rewrite it as
@ -57,26 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
return nullptr;
// We have two shift amounts from two different shifts. The types of those
// shift amounts may not match. If that's the case let's bailout now..
if (ShAmt0->getType() != ShAmt1->getType())
return nullptr;
// As input, we have the following pattern:
// Sh0 (Sh1 X, Q), K
// We want to rewrite that as:
// Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
// While we know that originally (Q+K) would not overflow
// (because 2 * (N-1) u<= iN -1), we have looked past extensions of
// shift amounts. so it may now overflow in smaller bitwidth.
// To ensure that does not happen, we need to ensure that the total maximal
// shift amount is still representable in that smaller bit width.
unsigned MaximalPossibleTotalShiftAmount =
(Sh0->getType()->getScalarSizeInBits() - 1) +
(Sh1->getType()->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
// Verify that it would be safe to try to add those two shift amounts.
if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1))
return nullptr;
// We are only looking for signbit extraction if we have two right shifts.
@ -220,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
// We have two shift amounts from two different shifts. The types of those
// shift amounts may not match. If that's the case let's bailout now.
if (MaskShAmt->getType() != ShiftShAmt->getType())
// Verify that it would be safe to try to add those two shift amounts.
if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
MaskShAmt))
return nullptr;
// Can we simplify (MaskShAmt+ShiftShAmt) ?
@ -252,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
// We have two shift amounts from two different shifts. The types of those
// shift amounts may not match. If that's the case let's bailout now.
if (MaskShAmt->getType() != ShiftShAmt->getType())
// Verify that it would be safe to try to add those two shift amounts.
if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
MaskShAmt))
return nullptr;
// Can we simplify (ShiftShAmt-MaskShAmt) ?

View File

@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
DenseSet<Instruction*> V;
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
for (auto *I : V) {
if (I->mayHaveSideEffects()) {
LLVM_DEBUG(dbgs() << "LRR: Aborting - "
<< "An instruction which does not belong to any root "
<< "sets must not have side effects: " << *I);
return false;
}
Uses[I].set(IL_All);
}

View File

@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled,
"Number of 'objectsize' intrinsic calls handled");
static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
Value *Op = II->getOperand(0);
return isa<Constant>(Op) ? ConstantInt::getTrue(II->getType())
: ConstantInt::getFalse(II->getType());
if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
if (C->isManifestConstant())
return ConstantInt::getTrue(II->getType());
return ConstantInt::getFalse(II->getType());
}
static bool replaceConditionalBranchesOnConstant(Instruction *II,

View File

@ -542,9 +542,14 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
auto Iter = AdditionalUsers.find(I);
if (Iter != AdditionalUsers.end()) {
// Copy additional users before notifying them of changes, because new
// users may be added, potentially invalidating the iterator.
SmallVector<Instruction *, 2> ToNotify;
for (User *U : Iter->second)
if (auto *UI = dyn_cast<Instruction>(U))
OperandChangedState(UI);
ToNotify.push_back(UI);
for (Instruction *UI : ToNotify)
OperandChangedState(UI);
}
}
void handleCallOverdefined(CallBase &CB);

View File

@ -780,7 +780,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
/// When inlining a call site that has !llvm.mem.parallel_loop_access,
/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
/// be propagated to all memory-accessing cloned instructions.
static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
Function::iterator FEnd) {
MDNode *MemParallelLoopAccess =
CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
@ -789,41 +790,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
return;
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
VMI != VMIE; ++VMI) {
// Check that key is an instruction, to skip the Argument mapping, which
// points to an instruction in the original function, not the inlined one.
if (!VMI->second || !isa<Instruction>(VMI->first))
continue;
for (BasicBlock &BB : make_range(FStart, FEnd)) {
for (Instruction &I : BB) {
// This metadata is only relevant for instructions that access memory.
if (!I.mayReadOrWriteMemory())
continue;
Instruction *NI = dyn_cast<Instruction>(VMI->second);
if (!NI)
continue;
// This metadata is only relevant for instructions that access memory.
if (!NI->mayReadOrWriteMemory())
continue;
if (MemParallelLoopAccess) {
// TODO: This probably should not overwrite MemParalleLoopAccess.
MemParallelLoopAccess = MDNode::concatenate(
NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access),
MemParallelLoopAccess);
NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access,
if (MemParallelLoopAccess) {
// TODO: This probably should not overwrite MemParalleLoopAccess.
MemParallelLoopAccess = MDNode::concatenate(
I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
MemParallelLoopAccess);
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
MemParallelLoopAccess);
}
if (AccessGroup)
I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
if (AliasScope)
I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
if (NoAlias)
I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
I.getMetadata(LLVMContext::MD_noalias), NoAlias));
}
if (AccessGroup)
NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
NI->getMetadata(LLVMContext::MD_access_group), AccessGroup));
if (AliasScope)
NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope));
if (NoAlias)
NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
NI->getMetadata(LLVMContext::MD_noalias), NoAlias));
}
}
@ -844,9 +837,9 @@ class ScopedAliasMetadataDeepCloner {
/// subsequent remap() calls.
void clone();
/// Remap instructions in the given VMap from the original to the cloned
/// Remap instructions in the given range from the original to the cloned
/// metadata.
void remap(ValueToValueMapTy &VMap);
void remap(Function::iterator FStart, Function::iterator FEnd);
};
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
@ -907,34 +900,27 @@ void ScopedAliasMetadataDeepCloner::clone() {
}
}
void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
Function::iterator FEnd) {
if (MDMap.empty())
return; // Nothing to do.
for (auto Entry : VMap) {
// Check that key is an instruction, to skip the Argument mapping, which
// points to an instruction in the original function, not the inlined one.
if (!Entry->second || !isa<Instruction>(Entry->first))
continue;
for (BasicBlock &BB : make_range(FStart, FEnd)) {
for (Instruction &I : BB) {
// TODO: The null checks for the MDMap.lookup() results should no longer
// be necessary.
if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
if (MDNode *MNew = MDMap.lookup(M))
I.setMetadata(LLVMContext::MD_alias_scope, MNew);
Instruction *I = dyn_cast<Instruction>(Entry->second);
if (!I)
continue;
if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
if (MDNode *MNew = MDMap.lookup(M))
I.setMetadata(LLVMContext::MD_noalias, MNew);
// Only update scopes when we find them in the map. If they are not, it is
// because we already handled that instruction before. This is faster than
// tracking which instructions we already updated.
if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
if (MDNode *MNew = MDMap.lookup(M))
I->setMetadata(LLVMContext::MD_alias_scope, MNew);
if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
if (MDNode *MNew = MDMap.lookup(M))
I->setMetadata(LLVMContext::MD_noalias, MNew);
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
Decl->setScopeList(MNew);
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
Decl->setScopeList(MNew);
}
}
}
@ -1926,7 +1912,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Now clone the inlined noalias scope metadata.
SAMetadataCloner.clone();
SAMetadataCloner.remap(VMap);
SAMetadataCloner.remap(FirstNewBlock, Caller->end());
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
@ -1936,7 +1922,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
AddReturnAttributes(CB, VMap);
// Propagate metadata on the callsite if necessary.
PropagateCallSiteMetadata(CB, VMap);
PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
// Register any cloned assumptions.
if (IFI.GetAssumptionCache)