Vendor import of llvm-project branch release/12.x llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2.
This commit is contained in:
parent
b4125f7d51
commit
e4bbddaec8
@ -57,6 +57,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
|
||||
} else if (Feature == "+pcrelative-memops") {
|
||||
HasPCRelativeMemops = true;
|
||||
} else if (Feature == "+spe" || Feature == "+efpu2") {
|
||||
HasStrictFP = false;
|
||||
HasSPE = true;
|
||||
LongDoubleWidth = LongDoubleAlign = 64;
|
||||
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
|
||||
|
@ -409,6 +409,7 @@ class InlinedOpenMPRegionRAII {
|
||||
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
|
||||
FieldDecl *LambdaThisCaptureField = nullptr;
|
||||
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
|
||||
bool NoInheritance = false;
|
||||
|
||||
public:
|
||||
/// Constructs region for combined constructs.
|
||||
@ -416,16 +417,19 @@ class InlinedOpenMPRegionRAII {
|
||||
/// a list of functions used for code generation of implicitly inlined
|
||||
/// regions.
|
||||
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
|
||||
OpenMPDirectiveKind Kind, bool HasCancel)
|
||||
: CGF(CGF) {
|
||||
OpenMPDirectiveKind Kind, bool HasCancel,
|
||||
bool NoInheritance = true)
|
||||
: CGF(CGF), NoInheritance(NoInheritance) {
|
||||
// Start emission for the construct.
|
||||
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
|
||||
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
|
||||
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
||||
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
|
||||
CGF.LambdaThisCaptureField = nullptr;
|
||||
BlockInfo = CGF.BlockInfo;
|
||||
CGF.BlockInfo = nullptr;
|
||||
if (NoInheritance) {
|
||||
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
||||
LambdaThisCaptureField = CGF.LambdaThisCaptureField;
|
||||
CGF.LambdaThisCaptureField = nullptr;
|
||||
BlockInfo = CGF.BlockInfo;
|
||||
CGF.BlockInfo = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
~InlinedOpenMPRegionRAII() {
|
||||
@ -434,9 +438,11 @@ class InlinedOpenMPRegionRAII {
|
||||
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
|
||||
delete CGF.CapturedStmtInfo;
|
||||
CGF.CapturedStmtInfo = OldCSI;
|
||||
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
||||
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
|
||||
CGF.BlockInfo = BlockInfo;
|
||||
if (NoInheritance) {
|
||||
std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
|
||||
CGF.LambdaThisCaptureField = LambdaThisCaptureField;
|
||||
CGF.BlockInfo = BlockInfo;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -3853,7 +3859,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
|
||||
// Processing for implicitly captured variables.
|
||||
InlinedOpenMPRegionRAII Region(
|
||||
CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
|
||||
/*HasCancel=*/false);
|
||||
/*HasCancel=*/false, /*NoInheritance=*/true);
|
||||
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
|
||||
}
|
||||
if (Type->isArrayType()) {
|
||||
@ -6214,7 +6220,9 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
|
||||
bool HasCancel) {
|
||||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
|
||||
InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
|
||||
InnerKind != OMPD_critical &&
|
||||
InnerKind != OMPD_master);
|
||||
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
|
||||
}
|
||||
|
||||
|
@ -6215,15 +6215,17 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
|
||||
|
||||
return *SanStats;
|
||||
}
|
||||
|
||||
llvm::Value *
|
||||
CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E,
|
||||
CodeGenFunction &CGF) {
|
||||
llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType());
|
||||
auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
|
||||
auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
|
||||
return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy,
|
||||
"__translate_sampler_initializer"),
|
||||
{C});
|
||||
auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
|
||||
auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
|
||||
auto *Call = CGF.Builder.CreateCall(
|
||||
CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C});
|
||||
Call->setCallingConv(Call->getCalledFunction()->getCallingConv());
|
||||
return Call;
|
||||
}
|
||||
|
||||
CharUnits CodeGenModule::getNaturalPointeeTypeAlignment(
|
||||
|
@ -1917,12 +1917,12 @@ class AnnotatingParser {
|
||||
if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
|
||||
return true;
|
||||
|
||||
if (Tok.Next->is(tok::l_paren) &&
|
||||
!(Tok.Previous && Tok.Previous->is(tok::identifier) &&
|
||||
Tok.Previous->Previous &&
|
||||
Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow,
|
||||
tok::star)))
|
||||
return true;
|
||||
// Look for a cast `( x ) (`.
|
||||
if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
|
||||
if (Tok.Previous->is(tok::identifier) &&
|
||||
Tok.Previous->Previous->is(tok::l_paren))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!Tok.Next->Next)
|
||||
return false;
|
||||
|
@ -1281,13 +1281,6 @@ void UnwrappedLineFormatter::formatFirstToken(
|
||||
if (Newlines)
|
||||
Indent = NewlineIndent;
|
||||
|
||||
// If in Whitemsmiths mode, indent start and end of blocks
|
||||
if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
|
||||
if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case,
|
||||
tok::kw_default))
|
||||
Indent += Style.IndentWidth;
|
||||
}
|
||||
|
||||
// Preprocessor directives get indented before the hash only if specified
|
||||
if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
|
||||
(Line.Type == LT_PreprocessorDirective ||
|
||||
|
@ -579,17 +579,23 @@ size_t UnwrappedLineParser::computePPHash() const {
|
||||
return h;
|
||||
}
|
||||
|
||||
void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
|
||||
bool MunchSemi) {
|
||||
void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
|
||||
bool MunchSemi,
|
||||
bool UnindentWhitesmithsBraces) {
|
||||
assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
|
||||
"'{' or macro block token expected");
|
||||
const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
|
||||
FormatTok->setBlockKind(BK_Block);
|
||||
|
||||
// For Whitesmiths mode, jump to the next level prior to skipping over the
|
||||
// braces.
|
||||
if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
|
||||
++Line->Level;
|
||||
|
||||
size_t PPStartHash = computePPHash();
|
||||
|
||||
unsigned InitialLevel = Line->Level;
|
||||
nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
|
||||
nextToken(/*LevelDifference=*/AddLevels);
|
||||
|
||||
if (MacroBlock && FormatTok->is(tok::l_paren))
|
||||
parseParens();
|
||||
@ -602,10 +608,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
|
||||
? (UnwrappedLine::kInvalidIndex)
|
||||
: (CurrentLines->size() - 1 - NbPreprocessorDirectives);
|
||||
|
||||
// Whitesmiths is weird here. The brace needs to be indented for the namespace
|
||||
// block, but the block itself may not be indented depending on the style
|
||||
// settings. This allows the format to back up one level in those cases.
|
||||
if (UnindentWhitesmithsBraces)
|
||||
--Line->Level;
|
||||
|
||||
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
|
||||
MustBeDeclaration);
|
||||
if (AddLevel)
|
||||
++Line->Level;
|
||||
if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
|
||||
Line->Level += AddLevels;
|
||||
parseLevel(/*HasOpeningBrace=*/true);
|
||||
|
||||
if (eof())
|
||||
@ -621,7 +633,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
|
||||
size_t PPEndHash = computePPHash();
|
||||
|
||||
// Munch the closing brace.
|
||||
nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
|
||||
nextToken(/*LevelDifference=*/-AddLevels);
|
||||
|
||||
if (MacroBlock && FormatTok->is(tok::l_paren))
|
||||
parseParens();
|
||||
@ -637,6 +649,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
|
||||
nextToken();
|
||||
|
||||
Line->Level = InitialLevel;
|
||||
FormatTok->setBlockKind(BK_Block);
|
||||
|
||||
if (PPStartHash == PPEndHash) {
|
||||
Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
|
||||
@ -2128,15 +2141,34 @@ void UnwrappedLineParser::parseNamespace() {
|
||||
if (ShouldBreakBeforeBrace(Style, InitialToken))
|
||||
addUnwrappedLine();
|
||||
|
||||
bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
|
||||
(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
|
||||
DeclarationScopeStack.size() > 1);
|
||||
parseBlock(/*MustBeDeclaration=*/true, AddLevel);
|
||||
unsigned AddLevels =
|
||||
Style.NamespaceIndentation == FormatStyle::NI_All ||
|
||||
(Style.NamespaceIndentation == FormatStyle::NI_Inner &&
|
||||
DeclarationScopeStack.size() > 1)
|
||||
? 1u
|
||||
: 0u;
|
||||
bool ManageWhitesmithsBraces =
|
||||
AddLevels == 0u &&
|
||||
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
|
||||
|
||||
// If we're in Whitesmiths mode, indent the brace if we're not indenting
|
||||
// the whole block.
|
||||
if (ManageWhitesmithsBraces)
|
||||
++Line->Level;
|
||||
|
||||
parseBlock(/*MustBeDeclaration=*/true, AddLevels,
|
||||
/*MunchSemi=*/true,
|
||||
/*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
|
||||
|
||||
// Munch the semicolon after a namespace. This is more common than one would
|
||||
// think. Putting the semicolon into its own line is very ugly.
|
||||
if (FormatTok->Tok.is(tok::semi))
|
||||
nextToken();
|
||||
addUnwrappedLine();
|
||||
|
||||
addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
|
||||
|
||||
if (ManageWhitesmithsBraces)
|
||||
--Line->Level;
|
||||
}
|
||||
// FIXME: Add error handling.
|
||||
}
|
||||
@ -2222,6 +2254,11 @@ void UnwrappedLineParser::parseDoWhile() {
|
||||
return;
|
||||
}
|
||||
|
||||
// If in Whitesmiths mode, the line with the while() needs to be indented
|
||||
// to the same level as the block.
|
||||
if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
|
||||
++Line->Level;
|
||||
|
||||
nextToken();
|
||||
parseStructuralElement();
|
||||
}
|
||||
@ -2234,25 +2271,19 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
|
||||
if (LeftAlignLabel)
|
||||
Line->Level = 0;
|
||||
|
||||
bool RemoveWhitesmithsCaseIndent =
|
||||
(!Style.IndentCaseBlocks &&
|
||||
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths);
|
||||
|
||||
if (RemoveWhitesmithsCaseIndent)
|
||||
--Line->Level;
|
||||
|
||||
if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
|
||||
FormatTok->Tok.is(tok::l_brace)) {
|
||||
|
||||
CompoundStatementIndenter Indenter(
|
||||
this, Line->Level, Style.BraceWrapping.AfterCaseLabel,
|
||||
Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent);
|
||||
CompoundStatementIndenter Indenter(this, Line->Level,
|
||||
Style.BraceWrapping.AfterCaseLabel,
|
||||
Style.BraceWrapping.IndentBraces);
|
||||
parseBlock(/*MustBeDeclaration=*/false);
|
||||
if (FormatTok->Tok.is(tok::kw_break)) {
|
||||
if (Style.BraceWrapping.AfterControlStatement ==
|
||||
FormatStyle::BWACS_Always) {
|
||||
addUnwrappedLine();
|
||||
if (RemoveWhitesmithsCaseIndent) {
|
||||
if (!Style.IndentCaseBlocks &&
|
||||
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
|
||||
Line->Level++;
|
||||
}
|
||||
}
|
||||
@ -2920,17 +2951,29 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
|
||||
llvm::dbgs() << "\n";
|
||||
}
|
||||
|
||||
void UnwrappedLineParser::addUnwrappedLine() {
|
||||
void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
|
||||
if (Line->Tokens.empty())
|
||||
return;
|
||||
LLVM_DEBUG({
|
||||
if (CurrentLines == &Lines)
|
||||
printDebugInfo(*Line);
|
||||
});
|
||||
|
||||
// If this line closes a block when in Whitesmiths mode, remember that
|
||||
// information so that the level can be decreased after the line is added.
|
||||
// This has to happen after the addition of the line since the line itself
|
||||
// needs to be indented.
|
||||
bool ClosesWhitesmithsBlock =
|
||||
Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
|
||||
Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
|
||||
|
||||
CurrentLines->push_back(std::move(*Line));
|
||||
Line->Tokens.clear();
|
||||
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
|
||||
Line->FirstStartColumn = 0;
|
||||
|
||||
if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
|
||||
--Line->Level;
|
||||
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
|
||||
CurrentLines->append(
|
||||
std::make_move_iterator(PreprocessorDirectives.begin()),
|
||||
|
@ -85,8 +85,9 @@ class UnwrappedLineParser {
|
||||
void reset();
|
||||
void parseFile();
|
||||
void parseLevel(bool HasOpeningBrace);
|
||||
void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
|
||||
bool MunchSemi = true);
|
||||
void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u,
|
||||
bool MunchSemi = true,
|
||||
bool UnindentWhitesmithsBraces = false);
|
||||
void parseChildBlock();
|
||||
void parsePPDirective();
|
||||
void parsePPDefine();
|
||||
@ -140,7 +141,12 @@ class UnwrappedLineParser {
|
||||
bool tryToParsePropertyAccessor();
|
||||
void tryToParseJSFunction();
|
||||
bool tryToParseSimpleAttribute();
|
||||
void addUnwrappedLine();
|
||||
|
||||
// Used by addUnwrappedLine to denote whether to keep or remove a level
|
||||
// when resetting the line state.
|
||||
enum class LineLevel { Remove, Keep };
|
||||
|
||||
void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
|
||||
bool eof() const;
|
||||
// LevelDifference is the difference of levels after and before the current
|
||||
// token. For example:
|
||||
|
@ -28,7 +28,7 @@
|
||||
Most SSE scalar float intrinsic operations can be performed more
|
||||
efficiently as C language float scalar operations or optimized to
|
||||
use vector SIMD operations. We recommend this for new applications. */
|
||||
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
|
||||
#endif
|
||||
|
||||
#ifndef _XMMINTRIN_H_INCLUDED
|
||||
@ -62,14 +62,13 @@
|
||||
|
||||
/* The Intel API is flexible enough that we must allow aliasing with other
|
||||
vector types, and their scalar components. */
|
||||
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
typedef vector float __m128 __attribute__((__may_alias__));
|
||||
|
||||
/* Unaligned version of the same type. */
|
||||
typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
|
||||
__aligned__ (1)));
|
||||
typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1)));
|
||||
|
||||
/* Internal data types for implementing the intrinsics. */
|
||||
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
|
||||
typedef vector float __v4sf;
|
||||
|
||||
/* Create an undefined vector. */
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
|
@ -2623,7 +2623,10 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) {
|
||||
return false;
|
||||
|
||||
const auto *CE = dyn_cast<CStyleCastExpr>(UO->getSubExpr());
|
||||
if (!CE || CE->getCastKind() != CK_IntegralToPointer)
|
||||
if (!CE)
|
||||
return false;
|
||||
if (CE->getCastKind() != CK_IntegralToPointer &&
|
||||
CE->getCastKind() != CK_NullToPointer)
|
||||
return false;
|
||||
|
||||
// The integer must be from an EnumConstantDecl.
|
||||
|
@ -99,14 +99,14 @@ struct allocator_traits
|
||||
};
|
||||
|
||||
template <>
|
||||
class allocator<void> // deprecated in C++17, removed in C++20
|
||||
class allocator<void> // removed in C++20
|
||||
{
|
||||
public:
|
||||
typedef void* pointer;
|
||||
typedef const void* const_pointer;
|
||||
typedef void value_type;
|
||||
typedef void* pointer; // deprecated in C++17
|
||||
typedef const void* const_pointer; // deprecated in C++17
|
||||
typedef void value_type; // deprecated in C++17
|
||||
|
||||
template <class _Up> struct rebind {typedef allocator<_Up> other;};
|
||||
template <class _Up> struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17
|
||||
};
|
||||
|
||||
template <class T>
|
||||
@ -786,27 +786,27 @@ to_address(const _Pointer& __p) _NOEXCEPT
|
||||
|
||||
template <class _Tp> class allocator;
|
||||
|
||||
#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS)
|
||||
#if _LIBCPP_STD_VER <= 17
|
||||
template <>
|
||||
class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<void>
|
||||
class _LIBCPP_TEMPLATE_VIS allocator<void>
|
||||
{
|
||||
public:
|
||||
typedef void* pointer;
|
||||
typedef const void* const_pointer;
|
||||
typedef void value_type;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type;
|
||||
|
||||
template <class _Up> struct rebind {typedef allocator<_Up> other;};
|
||||
template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
|
||||
};
|
||||
|
||||
template <>
|
||||
class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<const void>
|
||||
class _LIBCPP_TEMPLATE_VIS allocator<const void>
|
||||
{
|
||||
public:
|
||||
typedef const void* pointer;
|
||||
typedef const void* const_pointer;
|
||||
typedef const void value_type;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
|
||||
_LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type;
|
||||
|
||||
template <class _Up> struct rebind {typedef allocator<_Up> other;};
|
||||
template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@ -3110,7 +3110,9 @@ size_t VersionTableSection::getSize() const {
|
||||
void VersionTableSection::writeTo(uint8_t *buf) {
|
||||
buf += 2;
|
||||
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
|
||||
write16(buf, s.sym->versionId);
|
||||
// Use the original versionId for an unfetched lazy symbol (undefined weak),
|
||||
// which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error).
|
||||
write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId);
|
||||
buf += 2;
|
||||
}
|
||||
}
|
||||
|
@ -256,11 +256,11 @@ executable. To disambiguate between arguments passed to lldb and arguments
|
||||
passed to the debugged executable, arguments starting with a - must be passed
|
||||
after --.
|
||||
|
||||
lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
|
||||
lldb --arch x86_64 /path/to/program program argument -- --arch armv7
|
||||
|
||||
For convenience, passing the executable after -- is also supported.
|
||||
|
||||
lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
|
||||
lldb --arch x86_64 -- /path/to/program program argument --arch armv7
|
||||
|
||||
Passing one of the attach options causes :program:`lldb` to immediately attach
|
||||
to the given process.
|
||||
|
@ -751,11 +751,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) {
|
||||
arguments passed to the debugged executable, arguments starting with a - must
|
||||
be passed after --.
|
||||
|
||||
lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
|
||||
lldb --arch x86_64 /path/to/program program argument -- --arch armv7
|
||||
|
||||
For convenience, passing the executable after -- is also supported.
|
||||
|
||||
lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
|
||||
lldb --arch x86_64 -- /path/to/program program argument --arch armv7
|
||||
|
||||
Passing one of the attach options causes lldb to immediately attach to the
|
||||
given process.
|
||||
|
@ -23,7 +23,12 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class Any {
|
||||
class LLVM_EXTERNAL_VISIBILITY Any {
|
||||
|
||||
// The `Typeid<T>::Id` static data member below is a globally unique
|
||||
// identifier for the type `T`. It is explicitly marked with default
|
||||
// visibility so that when `-fvisibility=hidden` is used, the loader still
|
||||
// merges duplicate definitions across DSO boundaries.
|
||||
template <typename T> struct TypeId { static const char Id; };
|
||||
|
||||
struct StorageBase {
|
||||
|
@ -316,6 +316,7 @@ class LegalizerHelper {
|
||||
|
||||
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
|
||||
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
|
||||
|
||||
|
@ -214,6 +214,10 @@ class Constant : public User {
|
||||
/// both must either be scalars or vectors with the same element count. If no
|
||||
/// changes are made, the constant C is returned.
|
||||
static Constant *mergeUndefsWith(Constant *C, Constant *Other);
|
||||
|
||||
/// Return true if a constant is ConstantData or a ConstantAggregate or
|
||||
/// ConstantExpr that contain only ConstantData.
|
||||
bool isManifestConstant() const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -1808,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) {
|
||||
return APF.convertToDouble();
|
||||
}
|
||||
|
||||
static bool isManifestConstant(const Constant *c) {
|
||||
if (isa<ConstantData>(c)) {
|
||||
return true;
|
||||
} else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
|
||||
for (const Value *subc : c->operand_values()) {
|
||||
if (!isManifestConstant(cast<Constant>(subc)))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
|
||||
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
|
||||
C = &CI->getValue();
|
||||
@ -1845,7 +1832,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
|
||||
// We know we have a "Constant" argument. But we want to only
|
||||
// return true for manifest constants, not those that depend on
|
||||
// constants with unknowable values, e.g. GlobalValue or BlockAddress.
|
||||
if (isManifestConstant(Operands[0]))
|
||||
if (Operands[0]->isManifestConstant())
|
||||
return ConstantInt::getTrue(Ty->getContext());
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -4127,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
|
||||
TrueVal, FalseVal))
|
||||
return V;
|
||||
|
||||
// If we have an equality comparison, then we know the value in one of the
|
||||
// arms of the select. See if substituting this value into the arm and
|
||||
// If we have a scalar equality comparison, then we know the value in one of
|
||||
// the arms of the select. See if substituting this value into the arm and
|
||||
// simplifying the result yields the same value as the other arm.
|
||||
if (Pred == ICmpInst::ICMP_EQ) {
|
||||
// Note that the equivalence/replacement opportunity does not hold for vectors
|
||||
// because each element of a vector select is chosen independently.
|
||||
if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
|
||||
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
|
||||
/* AllowRefinement */ false, MaxRecurse) ==
|
||||
TrueVal ||
|
||||
|
@ -344,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
|
||||
// If we hit load/store with the same invariant.group metadata (and the
|
||||
// same pointer operand) we can assume that value pointed by pointer
|
||||
// operand didn't change.
|
||||
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
|
||||
if ((isa<LoadInst>(U) ||
|
||||
(isa<StoreInst>(U) &&
|
||||
cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
|
||||
U->hasMetadata(LLVMContext::MD_invariant_group))
|
||||
ClosestDependency = GetClosestDependency(ClosestDependency, U);
|
||||
}
|
||||
|
@ -10622,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
|
||||
if (!dominates(RHS, IncBB))
|
||||
return false;
|
||||
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
|
||||
// Make sure L does not refer to a value from a potentially previous
|
||||
// iteration of a loop.
|
||||
if (!properlyDominates(L, IncBB))
|
||||
return false;
|
||||
if (!ProvedEasily(L, RHS))
|
||||
return false;
|
||||
}
|
||||
|
@ -5150,6 +5150,9 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Limit number of instructions we look at, to avoid scanning through large
|
||||
// blocks. The current limit is chosen arbitrarily.
|
||||
unsigned ScanLimit = 32;
|
||||
BasicBlock::const_iterator End = BB->end();
|
||||
|
||||
if (!PoisonOnly) {
|
||||
@ -5160,6 +5163,11 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
|
||||
// For example, 'udiv x, (undef | 1)' isn't UB.
|
||||
|
||||
for (auto &I : make_range(Begin, End)) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
if (--ScanLimit == 0)
|
||||
break;
|
||||
|
||||
if (const auto *CB = dyn_cast<CallBase>(&I)) {
|
||||
for (unsigned i = 0; i < CB->arg_size(); ++i) {
|
||||
if (CB->paramHasAttr(i, Attribute::NoUndef) &&
|
||||
@ -5186,9 +5194,12 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
|
||||
for_each(V->users(), Propagate);
|
||||
Visited.insert(BB);
|
||||
|
||||
unsigned Iter = 0;
|
||||
while (Iter++ < MaxAnalysisRecursionDepth) {
|
||||
while (true) {
|
||||
for (auto &I : make_range(Begin, End)) {
|
||||
if (isa<DbgInfoIntrinsic>(I))
|
||||
continue;
|
||||
if (--ScanLimit == 0)
|
||||
return false;
|
||||
if (mustTriggerUB(&I, YieldsPoison))
|
||||
return true;
|
||||
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
|
||||
|
@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
|
||||
// For conditional branch lowering, we might try to do something silly like
|
||||
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
|
||||
// just re-use the existing condition vreg.
|
||||
if (CI && CI->getZExtValue() == 1 &&
|
||||
MRI->getType(CondLHS).getSizeInBits() == 1 &&
|
||||
CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
|
||||
if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
|
||||
CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
|
||||
Cond = CondLHS;
|
||||
} else {
|
||||
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
|
||||
|
@ -1257,22 +1257,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
}
|
||||
case TargetOpcode::G_FPTOUI: {
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
Observer.changingInstr(MI);
|
||||
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
}
|
||||
case TargetOpcode::G_FPTOSI: {
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
Observer.changingInstr(MI);
|
||||
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
}
|
||||
case TargetOpcode::G_FPTOUI:
|
||||
case TargetOpcode::G_FPTOSI:
|
||||
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
|
||||
case TargetOpcode::G_FPEXT:
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
@ -4496,6 +4483,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT NarrowTy) {
|
||||
if (TypeIdx != 0)
|
||||
return UnableToLegalize;
|
||||
|
||||
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
|
||||
|
||||
Register Src = MI.getOperand(1).getReg();
|
||||
LLT SrcTy = MRI.getType(Src);
|
||||
|
||||
// If all finite floats fit into the narrowed integer type, we can just swap
|
||||
// out the result type. This is practically only useful for conversions from
|
||||
// half to at least 16-bits, so just handle the one case.
|
||||
if (SrcTy.getScalarType() != LLT::scalar(16) ||
|
||||
NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
|
||||
return UnableToLegalize;
|
||||
|
||||
Observer.changingInstr(MI);
|
||||
narrowScalarDst(MI, NarrowTy, 0,
|
||||
IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
|
||||
Observer.changedInstr(MI);
|
||||
return Legalized;
|
||||
}
|
||||
|
||||
LegalizerHelper::LegalizeResult
|
||||
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
|
||||
LLT NarrowTy) {
|
||||
|
@ -7105,14 +7105,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
|
||||
if (LegalOperations)
|
||||
return SDValue();
|
||||
|
||||
// Collect all the stores in the chain.
|
||||
SDValue Chain;
|
||||
SmallVector<StoreSDNode *, 8> Stores;
|
||||
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
|
||||
// TODO: Allow unordered atomics when wider type is legal (see D66309)
|
||||
EVT MemVT = Store->getMemoryVT();
|
||||
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
|
||||
!Store->isSimple() || Store->isIndexed())
|
||||
// We only handle merging simple stores of 1-4 bytes.
|
||||
// TODO: Allow unordered atomics when wider type is legal (see D66309)
|
||||
EVT MemVT = N->getMemoryVT();
|
||||
if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
|
||||
!N->isSimple() || N->isIndexed())
|
||||
return SDValue();
|
||||
|
||||
// Collect all of the stores in the chain.
|
||||
SDValue Chain = N->getChain();
|
||||
SmallVector<StoreSDNode *, 8> Stores = {N};
|
||||
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
|
||||
// All stores must be the same size to ensure that we are writing all of the
|
||||
// bytes in the wide value.
|
||||
// TODO: We could allow multiple sizes by tracking each stored byte.
|
||||
if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
|
||||
Store->isIndexed())
|
||||
return SDValue();
|
||||
Stores.push_back(Store);
|
||||
Chain = Store->getChain();
|
||||
|
@ -1691,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
|
||||
/// terminator, but additionally the copies that move the vregs into the
|
||||
/// physical registers.
|
||||
static MachineBasicBlock::iterator
|
||||
FindSplitPointForStackProtector(MachineBasicBlock *BB) {
|
||||
FindSplitPointForStackProtector(MachineBasicBlock *BB,
|
||||
const TargetInstrInfo &TII) {
|
||||
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
|
||||
//
|
||||
if (SplitPoint == BB->begin())
|
||||
return SplitPoint;
|
||||
|
||||
@ -1701,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
|
||||
MachineBasicBlock::iterator Previous = SplitPoint;
|
||||
--Previous;
|
||||
|
||||
if (TII.isTailCall(*SplitPoint) &&
|
||||
Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
|
||||
// call itself, then we must insert before the sequence even starts. For
|
||||
// example:
|
||||
// <split point>
|
||||
// ADJCALLSTACKDOWN ...
|
||||
// <Moves>
|
||||
// ADJCALLSTACKUP ...
|
||||
// TAILJMP somewhere
|
||||
// On the other hand, it could be an unrelated call in which case this tail call
|
||||
// has to register moves of its own and should be the split point. For example:
|
||||
// ADJCALLSTACKDOWN
|
||||
// CALL something_else
|
||||
// ADJCALLSTACKUP
|
||||
// <split point>
|
||||
// TAILJMP somewhere
|
||||
do {
|
||||
--Previous;
|
||||
if (Previous->isCall())
|
||||
return SplitPoint;
|
||||
} while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
|
||||
|
||||
return Previous;
|
||||
}
|
||||
|
||||
while (MIIsInTerminatorSequence(*Previous)) {
|
||||
SplitPoint = Previous;
|
||||
if (Previous == Start)
|
||||
@ -1740,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
// Add load and check to the basicblock.
|
||||
FuncInfo->MBB = ParentMBB;
|
||||
FuncInfo->InsertPt =
|
||||
FindSplitPointForStackProtector(ParentMBB);
|
||||
FindSplitPointForStackProtector(ParentMBB, *TII);
|
||||
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
|
||||
CurDAG->setRoot(SDB->getRoot());
|
||||
SDB->clear();
|
||||
@ -1759,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
// register allocation issues caused by us splitting the parent mbb. The
|
||||
// register allocator will clean up said virtual copies later on.
|
||||
MachineBasicBlock::iterator SplitPoint =
|
||||
FindSplitPointForStackProtector(ParentMBB);
|
||||
FindSplitPointForStackProtector(ParentMBB, *TII);
|
||||
|
||||
// Splice the terminator of ParentMBB into SuccessMBB.
|
||||
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
|
||||
|
@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() {
|
||||
// instrumentation has already been generated.
|
||||
HasIRCheck = true;
|
||||
|
||||
// If we're instrumenting a block with a musttail call, the check has to be
|
||||
// inserted before the call rather than between it and the return. The
|
||||
// verifier guarantees that a musttail call is either directly before the
|
||||
// return or with a single correct bitcast of the return value in between so
|
||||
// we don't need to worry about many situations here.
|
||||
Instruction *CheckLoc = RI;
|
||||
Instruction *Prev = RI->getPrevNonDebugInstruction();
|
||||
if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
|
||||
CheckLoc = Prev;
|
||||
else if (Prev) {
|
||||
Prev = Prev->getPrevNonDebugInstruction();
|
||||
if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
|
||||
CheckLoc = Prev;
|
||||
}
|
||||
|
||||
// Generate epilogue instrumentation. The epilogue intrumentation can be
|
||||
// function-based or inlined depending on which mechanism the target is
|
||||
// providing.
|
||||
if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
|
||||
// Generate the function-based epilogue instrumentation.
|
||||
// The target provides a guard check function, generate a call to it.
|
||||
IRBuilder<> B(RI);
|
||||
IRBuilder<> B(CheckLoc);
|
||||
LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
|
||||
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
|
||||
Call->setAttributes(GuardCheck->getAttributes());
|
||||
Call->setCallingConv(GuardCheck->getCallingConv());
|
||||
} else {
|
||||
// Generate the epilogue with inline instrumentation.
|
||||
// If we do not support SelectionDAG based tail calls, generate IR level
|
||||
// tail calls.
|
||||
// If we do not support SelectionDAG based calls, generate IR level
|
||||
// calls.
|
||||
//
|
||||
// For each block with a return instruction, convert this:
|
||||
//
|
||||
@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() {
|
||||
BasicBlock *FailBB = CreateFailBB();
|
||||
|
||||
// Split the basic block before the return instruction.
|
||||
BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
|
||||
BasicBlock *NewBB =
|
||||
BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
|
||||
|
||||
// Update the dominator tree if we need to.
|
||||
if (DT && DT->isReachableFromEntry(BB)) {
|
||||
|
@ -803,6 +803,18 @@ Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) {
|
||||
return C;
|
||||
}
|
||||
|
||||
bool Constant::isManifestConstant() const {
|
||||
if (isa<ConstantData>(this))
|
||||
return true;
|
||||
if (isa<ConstantAggregate>(this) || isa<ConstantExpr>(this)) {
|
||||
for (const Value *Op : operand_values())
|
||||
if (!cast<Constant>(Op)->isManifestConstant())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ConstantInt
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
|
||||
if (TargetObjectWriter->getEMachine() == ELF::EM_386 &&
|
||||
Type == ELF::R_386_GOTOFF)
|
||||
return true;
|
||||
|
||||
// ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so
|
||||
// it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an
|
||||
// R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in
|
||||
// range of a MergeInputSection. We could introduce a new RelExpr member
|
||||
// (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12)
|
||||
// but the complexity is unnecessary given that GNU as keeps the original
|
||||
// symbol for this case as well.
|
||||
if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS &&
|
||||
!hasRelocationAddend())
|
||||
return true;
|
||||
}
|
||||
|
||||
// Most TLS relocations use a got, so they need the symbol. Even those that
|
||||
|
@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
|
||||
void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
|
||||
size_t FirstLineIndentedBy) {
|
||||
const StringRef ValHelpPrefix = " ";
|
||||
assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
|
||||
assert(BaseIndent >= FirstLineIndentedBy);
|
||||
std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
|
||||
outs().indent(BaseIndent - FirstLineIndentedBy)
|
||||
<< ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
|
||||
|
@ -16335,25 +16335,36 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
|
||||
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
|
||||
if (Size > 128) return AtomicExpansionKind::None;
|
||||
// Nand not supported in LSE.
|
||||
if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
|
||||
// Leave 128 bits to LLSC.
|
||||
if (Subtarget->hasLSE() && Size < 128)
|
||||
return AtomicExpansionKind::None;
|
||||
if (Subtarget->outlineAtomics() && Size < 128) {
|
||||
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
|
||||
// Don't outline them unless
|
||||
// (1) high level <atomic> support approved:
|
||||
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
|
||||
// (2) low level libgcc and compiler-rt support implemented by:
|
||||
// min/max outline atomics helpers
|
||||
if (AI->getOperation() != AtomicRMWInst::Min &&
|
||||
AI->getOperation() != AtomicRMWInst::Max &&
|
||||
AI->getOperation() != AtomicRMWInst::UMin &&
|
||||
AI->getOperation() != AtomicRMWInst::UMax) {
|
||||
|
||||
// Nand is not supported in LSE.
|
||||
// Leave 128 bits to LLSC or CmpXChg.
|
||||
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
|
||||
if (Subtarget->hasLSE())
|
||||
return AtomicExpansionKind::None;
|
||||
if (Subtarget->outlineAtomics()) {
|
||||
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
|
||||
// Don't outline them unless
|
||||
// (1) high level <atomic> support approved:
|
||||
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
|
||||
// (2) low level libgcc and compiler-rt support implemented by:
|
||||
// min/max outline atomics helpers
|
||||
if (AI->getOperation() != AtomicRMWInst::Min &&
|
||||
AI->getOperation() != AtomicRMWInst::Max &&
|
||||
AI->getOperation() != AtomicRMWInst::UMin &&
|
||||
AI->getOperation() != AtomicRMWInst::UMax) {
|
||||
return AtomicExpansionKind::None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
|
||||
// implement atomicrmw without spilling. If the target address is also on the
|
||||
// stack and close enough to the spill slot, this can lead to a situation
|
||||
// where the monitor always gets cleared and the atomic operation can never
|
||||
// succeed. So at -O0 lower this operation to a CAS loop.
|
||||
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
|
||||
return AtomicExpansionKind::CmpXChg;
|
||||
|
||||
return AtomicExpansionKind::LLSC;
|
||||
}
|
||||
|
||||
|
@ -1791,7 +1791,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
|
||||
NegOpc = AArch64::NEGv8i16;
|
||||
} else if (Ty == LLT::vector(16, 8)) {
|
||||
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
|
||||
NegOpc = AArch64::NEGv8i16;
|
||||
NegOpc = AArch64::NEGv16i8;
|
||||
} else if (Ty == LLT::vector(8, 8)) {
|
||||
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
|
||||
NegOpc = AArch64::NEGv8i8;
|
||||
|
@ -5934,6 +5934,9 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|
||||
|| AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
|
||||
|| AddrMode == ARMII::AddrModeT2_pc // PCrel access
|
||||
|| AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
|
||||
|| AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
|
||||
|| AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
|
||||
|| AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
|
||||
|| AddrMode == ARMII::AddrModeNone)
|
||||
return false;
|
||||
|
||||
@ -5976,6 +5979,10 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|
||||
NumBits = 8;
|
||||
break;
|
||||
case ARMII::AddrModeT2_i8s4:
|
||||
// FIXME: Values are already scaled in this addressing mode.
|
||||
assert((Fixup & 3) == 0 && "Can't encode this offset!");
|
||||
NumBits = 10;
|
||||
break;
|
||||
case ARMII::AddrModeT2_ldrex:
|
||||
NumBits = 8;
|
||||
Scale = 4;
|
||||
@ -5984,17 +5991,6 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|
||||
case ARMII::AddrMode_i12:
|
||||
NumBits = 12;
|
||||
break;
|
||||
case ARMII::AddrModeT2_i7:
|
||||
NumBits = 7;
|
||||
break;
|
||||
case ARMII::AddrModeT2_i7s2:
|
||||
NumBits = 7;
|
||||
Scale = 2;
|
||||
break;
|
||||
case ARMII::AddrModeT2_i7s4:
|
||||
NumBits = 7;
|
||||
Scale = 4;
|
||||
break;
|
||||
case ARMII::AddrModeT1_s: // SP-relative LD/ST
|
||||
NumBits = 8;
|
||||
Scale = 4;
|
||||
@ -6004,8 +6000,8 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|
||||
}
|
||||
// Make sure the offset is encodable for instructions that scale the
|
||||
// immediate.
|
||||
if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0)
|
||||
return false;
|
||||
assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
|
||||
"Can't encode this offset!");
|
||||
OffVal += Fixup / Scale;
|
||||
|
||||
unsigned Mask = (1 << NumBits) - 1;
|
||||
|
@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
|
||||
if (MI.getOpcode() == BPF::SRL_ri &&
|
||||
MI.getOperand(2).getImm() == 32) {
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
if (!MRI->hasOneNonDBGUse(SrcReg))
|
||||
continue;
|
||||
|
||||
MI2 = MRI->getVRegDef(SrcReg);
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
|
||||
|
@ -85,8 +85,17 @@ static bool BPFPreserveDITypeImpl(Function &F) {
|
||||
} else {
|
||||
Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
|
||||
DIType *Ty = cast<DIType>(MD);
|
||||
while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
|
||||
unsigned Tag = DTy->getTag();
|
||||
if (Tag != dwarf::DW_TAG_const_type &&
|
||||
Tag != dwarf::DW_TAG_volatile_type)
|
||||
break;
|
||||
Ty = DTy->getBaseType();
|
||||
}
|
||||
|
||||
if (Ty->getName().empty())
|
||||
report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
|
||||
MD = Ty;
|
||||
}
|
||||
|
||||
BasicBlock *BB = Call->getParent();
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "BPFTargetMachine.h"
|
||||
#include "BPF.h"
|
||||
#include "BPFTargetTransformInfo.h"
|
||||
#include "MCTargetDesc/BPFMCAsmInfo.h"
|
||||
#include "TargetInfo/BPFTargetInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
@ -145,6 +146,11 @@ void BPFPassConfig::addIRPasses() {
|
||||
TargetPassConfig::addIRPasses();
|
||||
}
|
||||
|
||||
TargetTransformInfo
|
||||
BPFTargetMachine::getTargetTransformInfo(const Function &F) {
|
||||
return TargetTransformInfo(BPFTTIImpl(this, F));
|
||||
}
|
||||
|
||||
// Install an instruction selector pass using
|
||||
// the ISelDag to gen BPF code.
|
||||
bool BPFPassConfig::addInstSelector() {
|
||||
|
@ -34,6 +34,8 @@ class BPFTargetMachine : public LLVMTargetMachine {
|
||||
|
||||
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
|
||||
|
||||
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
|
||||
|
||||
TargetLoweringObjectFile *getObjFileLowering() const override {
|
||||
return TLOF.get();
|
||||
}
|
||||
|
61
llvm/lib/Target/BPF/BPFTargetTransformInfo.h
Normal file
61
llvm/lib/Target/BPF/BPFTargetTransformInfo.h
Normal file
@ -0,0 +1,61 @@
|
||||
//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file uses the target's specific information to
|
||||
// provide more precise answers to certain TTI queries, while letting the
|
||||
// target independent and default TTI implementations handle the rest.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
|
||||
#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
|
||||
|
||||
#include "BPFTargetMachine.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/BasicTTIImpl.h"
|
||||
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
|
||||
|
||||
namespace llvm {
|
||||
class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
|
||||
typedef BasicTTIImplBase<BPFTTIImpl> BaseT;
|
||||
typedef TargetTransformInfo TTI;
|
||||
friend BaseT;
|
||||
|
||||
const BPFSubtarget *ST;
|
||||
const BPFTargetLowering *TLI;
|
||||
|
||||
const BPFSubtarget *getST() const { return ST; }
|
||||
const BPFTargetLowering *getTLI() const { return TLI; }
|
||||
|
||||
public:
|
||||
explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F)
|
||||
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
|
||||
TLI(ST->getTargetLowering()) {}
|
||||
|
||||
int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
|
||||
if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
|
||||
return TTI::TCC_Free;
|
||||
|
||||
return TTI::TCC_Basic;
|
||||
}
|
||||
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
CmpInst::Predicate VecPred,
|
||||
TTI::TargetCostKind CostKind,
|
||||
const llvm::Instruction *I = nullptr) {
|
||||
if (Opcode == Instruction::Select)
|
||||
return SCEVCheapExpansionBudget;
|
||||
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
|
||||
I);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
|
@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC)
|
||||
HANDLE_BTF_KIND(13, FUNC_PROTO)
|
||||
HANDLE_BTF_KIND(14, VAR)
|
||||
HANDLE_BTF_KIND(15, DATASEC)
|
||||
HANDLE_BTF_KIND(16, FLOAT)
|
||||
|
||||
#undef HANDLE_BTF_KIND
|
||||
|
@ -371,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) {
|
||||
}
|
||||
}
|
||||
|
||||
BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName)
|
||||
: Name(TypeName) {
|
||||
Kind = BTF::BTF_KIND_FLOAT;
|
||||
BTFType.Info = Kind << 24;
|
||||
BTFType.Size = roundupToBytes(SizeInBits);
|
||||
}
|
||||
|
||||
void BTFTypeFloat::completeType(BTFDebug &BDebug) {
|
||||
if (IsCompleted)
|
||||
return;
|
||||
IsCompleted = true;
|
||||
|
||||
BTFType.NameOff = BDebug.addString(Name);
|
||||
}
|
||||
|
||||
uint32_t BTFStringTable::addString(StringRef S) {
|
||||
// Check whether the string already exists.
|
||||
for (auto &OffsetM : OffsetToIdMap) {
|
||||
@ -409,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
|
||||
}
|
||||
|
||||
void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
|
||||
// Only int types are supported in BTF.
|
||||
// Only int and binary floating point types are supported in BTF.
|
||||
uint32_t Encoding = BTy->getEncoding();
|
||||
if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
|
||||
Encoding != dwarf::DW_ATE_signed_char &&
|
||||
Encoding != dwarf::DW_ATE_unsigned &&
|
||||
Encoding != dwarf::DW_ATE_unsigned_char)
|
||||
std::unique_ptr<BTFTypeBase> TypeEntry;
|
||||
switch (Encoding) {
|
||||
case dwarf::DW_ATE_boolean:
|
||||
case dwarf::DW_ATE_signed:
|
||||
case dwarf::DW_ATE_signed_char:
|
||||
case dwarf::DW_ATE_unsigned:
|
||||
case dwarf::DW_ATE_unsigned_char:
|
||||
// Create a BTF type instance for this DIBasicType and put it into
|
||||
// DIToIdMap for cross-type reference check.
|
||||
TypeEntry = std::make_unique<BTFTypeInt>(
|
||||
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
|
||||
break;
|
||||
case dwarf::DW_ATE_float:
|
||||
TypeEntry =
|
||||
std::make_unique<BTFTypeFloat>(BTy->getSizeInBits(), BTy->getName());
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a BTF type instance for this DIBasicType and put it into
|
||||
// DIToIdMap for cross-type reference check.
|
||||
auto TypeEntry = std::make_unique<BTFTypeInt>(
|
||||
Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
|
||||
TypeId = addType(std::move(TypeEntry), BTy);
|
||||
}
|
||||
|
||||
@ -1171,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
|
||||
if (Linkage != GlobalValue::InternalLinkage &&
|
||||
Linkage != GlobalValue::ExternalLinkage &&
|
||||
Linkage != GlobalValue::WeakAnyLinkage &&
|
||||
Linkage != GlobalValue::WeakODRLinkage &&
|
||||
Linkage != GlobalValue::ExternalWeakLinkage)
|
||||
continue;
|
||||
|
||||
@ -1199,8 +1225,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
|
||||
const DataLayout &DL = Global.getParent()->getDataLayout();
|
||||
uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
|
||||
|
||||
DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global),
|
||||
Size);
|
||||
DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
|
||||
Asm->getSymbol(&Global), Size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1278,7 +1304,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
|
||||
uint8_t Scope = BTF::FUNC_EXTERN;
|
||||
auto FuncTypeEntry =
|
||||
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
|
||||
addType(std::move(FuncTypeEntry));
|
||||
uint32_t FuncId = addType(std::move(FuncTypeEntry));
|
||||
if (F->hasSection()) {
|
||||
StringRef SecName = F->getSection();
|
||||
|
||||
if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
|
||||
DataSecEntries[std::string(SecName)] =
|
||||
std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
|
||||
}
|
||||
|
||||
// We really don't know func size, set it to 0.
|
||||
DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId,
|
||||
Asm->getSymbol(F), 0);
|
||||
}
|
||||
}
|
||||
|
||||
void BTFDebug::endModule() {
|
||||
|
@ -187,7 +187,7 @@ class BTFKindDataSec : public BTFTypeBase {
|
||||
uint32_t getSize() override {
|
||||
return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
|
||||
}
|
||||
void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
|
||||
void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
|
||||
Vars.push_back(std::make_tuple(Id, Sym, Size));
|
||||
}
|
||||
std::string getName() { return Name; }
|
||||
@ -195,6 +195,15 @@ class BTFKindDataSec : public BTFTypeBase {
|
||||
void emitType(MCStreamer &OS) override;
|
||||
};
|
||||
|
||||
/// Handle binary floating point type.
|
||||
class BTFTypeFloat : public BTFTypeBase {
|
||||
StringRef Name;
|
||||
|
||||
public:
|
||||
BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName);
|
||||
void completeType(BTFDebug &BDebug) override;
|
||||
};
|
||||
|
||||
/// String table.
|
||||
class BTFStringTable {
|
||||
/// String table size in bytes.
|
||||
|
@ -321,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
O << "0, ";
|
||||
printOperand(MI, OpNo, O);
|
||||
return false;
|
||||
case 'I':
|
||||
// Write 'i' if an integer constant, otherwise nothing. Used to print
|
||||
// addi vs add, etc.
|
||||
if (MI->getOperand(OpNo).isImm())
|
||||
O << "i";
|
||||
return false;
|
||||
case 'U': // Print 'u' for update form.
|
||||
case 'X': // Print 'x' for indexed form.
|
||||
// FIXME: Currently for PowerPC memory operands are always loaded
|
||||
|
@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
BuildMI(MBB, MBBI, dl,
|
||||
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
|
||||
: PPC::PROBED_STACKALLOC_32))
|
||||
.addDef(ScratchReg)
|
||||
.addDef(TempReg) // TempReg stores the old sp.
|
||||
.addDef(TempReg)
|
||||
.addDef(ScratchReg) // ScratchReg stores the old sp.
|
||||
.addImm(NegFrameSize);
|
||||
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
|
||||
// update the ScratchReg to meet the assumption that ScratchReg contains
|
||||
// the NegFrameSize. This solution is rather tricky.
|
||||
if (!HasRedZone) {
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
|
||||
.addReg(TempReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(SPReg);
|
||||
HasSTUX = true;
|
||||
}
|
||||
@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
|
||||
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
||||
MachineBasicBlock &PrologMBB) const {
|
||||
// TODO: Generate CFI instructions.
|
||||
bool isPPC64 = Subtarget.isPPC64();
|
||||
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
|
||||
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
|
||||
@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
Register BPReg = RegInfo->getBaseRegister(MF);
|
||||
Align MaxAlign = MFI.getMaxAlign();
|
||||
bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
|
||||
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
|
||||
// Subroutines to generate .cfi_* directives.
|
||||
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
|
||||
@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
|
||||
.addReg(SPReg)
|
||||
.addReg(NegSizeReg);
|
||||
};
|
||||
// Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
|
||||
// when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
|
||||
// available and r1 is already copied to r30 which is BPReg. So BPReg stores
|
||||
// the value of stackptr.
|
||||
// First we have to probe tail interval whose size is less than probesize,
|
||||
// i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
|
||||
// ScratchReg stores the value of ((stackptr % align) % probesize). Then we
|
||||
// probe each block sized probesize until stackptr meets
|
||||
// (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
|
||||
// as negprobesize. At both stages, TempReg stores the value of
|
||||
// (stackptr - (stackptr % align)).
|
||||
auto dynamicProbe = [&](MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, Register ScratchReg,
|
||||
Register TempReg) {
|
||||
assert(HasBP && isPPC64 && "Probe alignment part not available");
|
||||
// Used to probe stack when realignment is required.
|
||||
// Note that, according to ABI's requirement, *sp must always equals the
|
||||
// value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
|
||||
// Following is pseudo code:
|
||||
// final_sp = (sp & align) + negframesize;
|
||||
// neg_gap = final_sp - sp;
|
||||
// while (neg_gap < negprobesize) {
|
||||
// stdu fp, negprobesize(sp);
|
||||
// neg_gap -= negprobesize;
|
||||
// }
|
||||
// stdux fp, sp, neg_gap
|
||||
//
|
||||
// When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
|
||||
// before probe code, we don't need to save it, so we get one additional reg
|
||||
// that can be used to materialize the probeside if needed to use xform.
|
||||
// Otherwise, we can NOT materialize probeside, so we can only use Dform for
|
||||
// now.
|
||||
//
|
||||
// The allocations are:
|
||||
// if (HasBP && HasRedzone) {
|
||||
// r0: materialize the probesize if needed so that we can use xform.
|
||||
// r12: `neg_gap`
|
||||
// } else {
|
||||
// r0: back-chain pointer
|
||||
// r12: `neg_gap`.
|
||||
// }
|
||||
auto probeRealignedStack = [&](MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
Register ScratchReg, Register TempReg) {
|
||||
assert(HasBP && "The function is supposed to have base pointer when its "
|
||||
"stack is realigned.");
|
||||
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
|
||||
// ScratchReg = stackptr % align
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(BPReg)
|
||||
.addImm(0)
|
||||
.addImm(64 - Log2(MaxAlign));
|
||||
// TempReg = stackptr - (stackptr % align)
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(BPReg);
|
||||
// ScratchReg = (stackptr % align) % probesize
|
||||
BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(0)
|
||||
.addImm(64 - Log2(ProbeSize));
|
||||
|
||||
// FIXME: We can eliminate this limitation if we get more infomation about
|
||||
// which part of redzone are already used. Used redzone can be treated
|
||||
// probed. But there might be `holes' in redzone probed, this could
|
||||
// complicate the implementation.
|
||||
assert(ProbeSize >= Subtarget.getRedZoneSize() &&
|
||||
"Probe size should be larger or equal to the size of red-zone so "
|
||||
"that red-zone is not clobbered by probing.");
|
||||
|
||||
Register &FinalStackPtr = TempReg;
|
||||
// FIXME: We only support NegProbeSize materializable by DForm currently.
|
||||
// When HasBP && HasRedzone, we can use xform if we have an additional idle
|
||||
// register.
|
||||
NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
|
||||
assert(isInt<16>(NegProbeSize) &&
|
||||
"NegProbeSize should be materializable by DForm");
|
||||
Register CRReg = PPC::CR0;
|
||||
// If (stackptr % align) % probesize == 0, we should not generate probe
|
||||
// code. Layout of output assembly kinda like:
|
||||
// Layout of output assembly kinda like:
|
||||
// bb.0:
|
||||
// ...
|
||||
// cmpldi $scratchreg, 0
|
||||
// beq bb.2
|
||||
// bb.1: # Probe tail interval
|
||||
// neg $scratchreg, $scratchreg
|
||||
// stdux $bpreg, r1, $scratchreg
|
||||
// sub $scratchreg, $finalsp, r1
|
||||
// cmpdi $scratchreg, <negprobesize>
|
||||
// bge bb.2
|
||||
// bb.1:
|
||||
// stdu <backchain>, <negprobesize>(r1)
|
||||
// sub $scratchreg, $scratchreg, negprobesize
|
||||
// cmpdi $scratchreg, <negprobesize>
|
||||
// blt bb.1
|
||||
// bb.2:
|
||||
// <materialize negprobesize into $scratchreg>
|
||||
// cmpd r1, $tempreg
|
||||
// beq bb.4
|
||||
// bb.3: # Loop to probe each block
|
||||
// stdux $bpreg, r1, $scratchreg
|
||||
// cmpd r1, $tempreg
|
||||
// bne bb.3
|
||||
// bb.4:
|
||||
// ...
|
||||
// stdux <backchain>, r1, $scratchreg
|
||||
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
|
||||
MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeResidualMBB);
|
||||
MachineBasicBlock *ProbeLoopPreHeaderMBB =
|
||||
MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
|
||||
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
|
||||
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ProbeExitMBB);
|
||||
// bb.4
|
||||
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
|
||||
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
// bb.0
|
||||
BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
|
||||
BuildMI(&MBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_EQ)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeLoopPreHeaderMBB);
|
||||
MBB.addSuccessor(ProbeResidualMBB);
|
||||
MBB.addSuccessor(ProbeLoopPreHeaderMBB);
|
||||
// bb.1
|
||||
BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
|
||||
.addReg(ScratchReg);
|
||||
allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
|
||||
false, BPReg);
|
||||
ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
|
||||
// bb.2
|
||||
MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
|
||||
NegProbeSize, ScratchReg);
|
||||
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(TempReg);
|
||||
BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_EQ)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeExitMBB);
|
||||
ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
|
||||
ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
|
||||
// bb.3
|
||||
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
|
||||
false, BPReg);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(TempReg);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_NE)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeLoopBodyMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
|
||||
{
|
||||
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
|
||||
allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
|
||||
BackChainPointer);
|
||||
if (HasRedZone)
|
||||
// PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
|
||||
// to TempReg to satisfy it.
|
||||
BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
|
||||
.addReg(BPReg)
|
||||
.addReg(BPReg);
|
||||
ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
|
||||
ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
}
|
||||
// bb.0
|
||||
{
|
||||
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(FinalStackPtr);
|
||||
if (!HasRedZone)
|
||||
BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
|
||||
BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(NegProbeSize);
|
||||
BuildMI(&MBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_GE)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeExitMBB);
|
||||
MBB.addSuccessor(ProbeLoopBodyMBB);
|
||||
MBB.addSuccessor(ProbeExitMBB);
|
||||
}
|
||||
// bb.1
|
||||
{
|
||||
Register BackChainPointer = HasRedZone ? BPReg : TempReg;
|
||||
allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
|
||||
0, true /*UseDForm*/, BackChainPointer);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
|
||||
ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(-NegProbeSize);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
|
||||
CRReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(NegProbeSize);
|
||||
BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
|
||||
.addImm(PPC::PRED_LT)
|
||||
.addReg(CRReg)
|
||||
.addMBB(ProbeLoopBodyMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
|
||||
ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
|
||||
}
|
||||
// Update liveins.
|
||||
recomputeLiveIns(*ProbeResidualMBB);
|
||||
recomputeLiveIns(*ProbeLoopPreHeaderMBB);
|
||||
recomputeLiveIns(*ProbeLoopBodyMBB);
|
||||
recomputeLiveIns(*ProbeExitMBB);
|
||||
return ProbeExitMBB;
|
||||
};
|
||||
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
|
||||
// SP = SP - SP % MaxAlign.
|
||||
// SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
|
||||
// the offset subtracted from SP is determined by SP's runtime value.
|
||||
if (HasBP && MaxAlign > 1) {
|
||||
// FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
|
||||
// 64-bit mode.
|
||||
if (isPPC64) {
|
||||
// Use BPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
|
||||
// Since we have SPReg copied to BPReg at the moment, FPReg can be used as
|
||||
// TempReg.
|
||||
Register TempReg = FPReg;
|
||||
CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
|
||||
// Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
|
||||
.addReg(BPReg)
|
||||
.addReg(BPReg);
|
||||
} else {
|
||||
// Initialize current frame pointer.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
|
||||
// Calculate final stack pointer.
|
||||
if (isPPC64)
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
|
||||
.addReg(SPReg)
|
||||
.addReg(SPReg);
|
||||
// Use FPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
|
||||
.addImm(0)
|
||||
.addImm(64 - Log2(MaxAlign));
|
||||
else
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
|
||||
.addReg(FPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0)
|
||||
.addImm(32 - Log2(MaxAlign))
|
||||
.addImm(31);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
|
||||
FPReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(SPReg);
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
|
||||
FPReg)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(FPReg);
|
||||
CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
|
||||
if (needsCFI)
|
||||
buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
|
||||
} else {
|
||||
// Initialize current frame pointer.
|
||||
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
|
||||
// Use FPReg to calculate CFA.
|
||||
if (needsCFI)
|
||||
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
|
||||
}
|
||||
// Probe residual part.
|
||||
if (NegResidualSize) {
|
||||
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
|
||||
if (!ResidualUseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
|
||||
ResidualUseDForm, FPReg);
|
||||
}
|
||||
bool UseDForm = CanUseDForm(NegProbeSize);
|
||||
// If number of blocks is small, just probe them directly.
|
||||
if (NumBlocks < 3) {
|
||||
if (!UseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
for (int i = 0; i < NumBlocks; ++i)
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
|
||||
FPReg);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
|
||||
// Probe residual part.
|
||||
if (NegResidualSize) {
|
||||
bool ResidualUseDForm = CanUseDForm(NegResidualSize);
|
||||
if (!ResidualUseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
|
||||
ResidualUseDForm, FPReg);
|
||||
}
|
||||
} else {
|
||||
// Since CTR is a volatile register and current shrinkwrap implementation
|
||||
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
|
||||
// CTR loop to probe.
|
||||
// Calculate trip count and stores it in CTRReg.
|
||||
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
|
||||
.addReg(ScratchReg, RegState::Kill);
|
||||
if (!UseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
// Create MBBs of the loop.
|
||||
MachineFunction::iterator MBBInsertPoint =
|
||||
std::next(CurrentMBB->getIterator());
|
||||
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, LoopMBB);
|
||||
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ExitMBB);
|
||||
// Synthesize the loop body.
|
||||
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
|
||||
UseDForm, FPReg);
|
||||
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
|
||||
.addMBB(LoopMBB);
|
||||
LoopMBB->addSuccessor(ExitMBB);
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
// Synthesize the exit MBB.
|
||||
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
CurrentMBB->end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
|
||||
CurrentMBB->addSuccessor(LoopMBB);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
|
||||
bool UseDForm = CanUseDForm(NegProbeSize);
|
||||
// If number of blocks is small, just probe them directly.
|
||||
if (NumBlocks < 3) {
|
||||
if (!UseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
for (int i = 0; i < NumBlocks; ++i)
|
||||
allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
|
||||
FPReg);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
|
||||
}
|
||||
} else {
|
||||
// Since CTR is a volatile register and current shrinkwrap implementation
|
||||
// won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
|
||||
// CTR loop to probe.
|
||||
// Calculate trip count and stores it in CTRReg.
|
||||
MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
|
||||
BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
|
||||
.addReg(ScratchReg, RegState::Kill);
|
||||
if (!UseDForm)
|
||||
MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
|
||||
// Create MBBs of the loop.
|
||||
MachineFunction::iterator MBBInsertPoint =
|
||||
std::next(CurrentMBB->getIterator());
|
||||
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, LoopMBB);
|
||||
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
|
||||
MF.insert(MBBInsertPoint, ExitMBB);
|
||||
// Synthesize the loop body.
|
||||
allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
|
||||
UseDForm, FPReg);
|
||||
BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
|
||||
.addMBB(LoopMBB);
|
||||
LoopMBB->addSuccessor(ExitMBB);
|
||||
LoopMBB->addSuccessor(LoopMBB);
|
||||
// Synthesize the exit MBB.
|
||||
ExitMBB->splice(ExitMBB->end(), CurrentMBB,
|
||||
std::next(MachineBasicBlock::iterator(MI)),
|
||||
CurrentMBB->end());
|
||||
ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
|
||||
CurrentMBB->addSuccessor(LoopMBB);
|
||||
if (needsCFI) {
|
||||
// Restore using SPReg to calculate CFA.
|
||||
buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
|
||||
}
|
||||
// Update liveins.
|
||||
recomputeLiveIns(*LoopMBB);
|
||||
recomputeLiveIns(*ExitMBB);
|
||||
}
|
||||
// Update liveins.
|
||||
recomputeLiveIns(*LoopMBB);
|
||||
recomputeLiveIns(*ExitMBB);
|
||||
}
|
||||
++NumPrologProbed;
|
||||
MI.eraseFromParent();
|
||||
|
@ -167,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
|
||||
|
||||
// Custom lower inline assembly to check for special registers.
|
||||
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
|
||||
|
||||
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
|
||||
for (MVT VT : MVT::integer_valuetypes()) {
|
||||
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
|
||||
@ -3461,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
|
||||
return Op.getOperand(0);
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
|
||||
|
||||
assert((Op.getOpcode() == ISD::INLINEASM ||
|
||||
Op.getOpcode() == ISD::INLINEASM_BR) &&
|
||||
"Expecting Inline ASM node.");
|
||||
|
||||
// If an LR store is already known to be required then there is not point in
|
||||
// checking this ASM as well.
|
||||
if (MFI.isLRStoreRequired())
|
||||
return Op;
|
||||
|
||||
// Inline ASM nodes have an optional last operand that is an incoming Flag of
|
||||
// type MVT::Glue. We want to ignore this last operand if that is the case.
|
||||
unsigned NumOps = Op.getNumOperands();
|
||||
if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
|
||||
--NumOps;
|
||||
|
||||
// Check all operands that may contain the LR.
|
||||
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
|
||||
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
|
||||
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
|
||||
++i; // Skip the ID value.
|
||||
|
||||
switch (InlineAsm::getKind(Flags)) {
|
||||
default:
|
||||
llvm_unreachable("Bad flags!");
|
||||
case InlineAsm::Kind_RegUse:
|
||||
case InlineAsm::Kind_Imm:
|
||||
case InlineAsm::Kind_Mem:
|
||||
i += NumVals;
|
||||
break;
|
||||
case InlineAsm::Kind_Clobber:
|
||||
case InlineAsm::Kind_RegDef:
|
||||
case InlineAsm::Kind_RegDefEarlyClobber: {
|
||||
for (; NumVals; --NumVals, ++i) {
|
||||
Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
|
||||
if (Reg != PPC::LR && Reg != PPC::LR8)
|
||||
continue;
|
||||
MFI.setLRStoreRequired();
|
||||
return Op;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
if (Subtarget.isAIXABI())
|
||||
@ -10316,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
|
||||
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
|
||||
|
||||
case ISD::INLINEASM:
|
||||
case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
|
||||
// Variable argument lowering.
|
||||
case ISD::VASTART: return LowerVASTART(Op, DAG);
|
||||
case ISD::VAARG: return LowerVAARG(Op, DAG);
|
||||
@ -15090,6 +15147,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
return std::make_pair(0U, &PPC::VSSRCRegClass);
|
||||
else
|
||||
return std::make_pair(0U, &PPC::VSFRCRegClass);
|
||||
} else if (Constraint == "lr") {
|
||||
if (VT == MVT::i64)
|
||||
return std::make_pair(0U, &PPC::LR8RCRegClass);
|
||||
else
|
||||
return std::make_pair(0U, &PPC::LRRCRegClass);
|
||||
}
|
||||
|
||||
// If we name a VSX register, we can't defer to the base class because it
|
||||
|
@ -1128,6 +1128,7 @@ namespace llvm {
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -173,7 +173,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
|
||||
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
|
||||
def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index,
|
||||
[!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>,
|
||||
DwarfRegNum<[0, 0]>;
|
||||
DwarfRegNum<[-1, -1]>;
|
||||
}
|
||||
|
||||
// VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63).
|
||||
@ -181,7 +181,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
|
||||
def VSRp#!add(!srl(Index, 1), 16) :
|
||||
VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32),
|
||||
[!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>,
|
||||
DwarfRegNum<[0, 0]>;
|
||||
DwarfRegNum<[-1, -1]>;
|
||||
}
|
||||
}
|
||||
|
||||
@ -409,20 +409,27 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
|
||||
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
|
||||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
let SubRegIndices = [sub_pair0, sub_pair1] in {
|
||||
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
|
||||
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
|
||||
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
|
||||
}
|
||||
def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
|
||||
ACC4, ACC5, ACC6, ACC7)> {
|
||||
@ -430,14 +437,14 @@ def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
|
||||
}
|
||||
|
||||
let SubRegIndices = [sub_pair0, sub_pair1] in {
|
||||
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
|
||||
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
|
||||
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
|
||||
}
|
||||
def UACCRC : RegisterClass<"PPC", [v512i1], 128,
|
||||
(add UACC0, UACC1, UACC2, UACC3,
|
||||
|
@ -1212,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
|
||||
}
|
||||
|
||||
bool PPCTTIImpl::areFunctionArgsABICompatible(
|
||||
const Function *Caller, const Function *Callee,
|
||||
SmallPtrSetImpl<Argument *> &Args) const {
|
||||
|
||||
// We need to ensure that argument promotion does not
|
||||
// attempt to promote pointers to MMA types (__vector_pair
|
||||
// and __vector_quad) since these types explicitly cannot be
|
||||
// passed as arguments. Both of these types are larger than
|
||||
// the 128-bit Altivec vectors and have a scalar size of 1 bit.
|
||||
if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
|
||||
return false;
|
||||
|
||||
return llvm::none_of(Args, [](Argument *A) {
|
||||
auto *EltTy = cast<PointerType>(A->getType())->getElementType();
|
||||
if (EltTy->isSized())
|
||||
return (EltTy->isIntOrIntVectorTy(1) &&
|
||||
EltTy->getPrimitiveSizeInBits() > 128);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
|
||||
LoopInfo *LI, DominatorTree *DT,
|
||||
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
|
||||
|
@ -129,6 +129,9 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
|
||||
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
||||
TTI::TargetCostKind CostKind);
|
||||
|
||||
bool areFunctionArgsABICompatible(const Function *Caller,
|
||||
const Function *Callee,
|
||||
SmallPtrSetImpl<Argument *> &Args) const;
|
||||
/// @}
|
||||
};
|
||||
|
||||
|
@ -3909,10 +3909,10 @@ foreach vti = AllIntegerVectors in {
|
||||
(DecImm simm5_plus1:$rs2),
|
||||
GPR:$vl,
|
||||
vti.SEW)>;
|
||||
def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
|
||||
def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge),
|
||||
(vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Scalar simm5_plus1:$rs2),
|
||||
(vti.Mask VR:$merge),
|
||||
(vti.Mask V0),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
|
||||
VR:$merge,
|
||||
@ -3922,17 +3922,17 @@ foreach vti = AllIntegerVectors in {
|
||||
GPR:$vl,
|
||||
vti.SEW)>;
|
||||
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Scalar simm5_plus1:$rs2),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
|
||||
(DecImm simm5_plus1:$rs2),
|
||||
GPR:$vl,
|
||||
vti.SEW)>;
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
|
||||
(vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Scalar simm5_plus1:$rs2),
|
||||
(vti.Mask VR:$merge),
|
||||
(vti.Mask V0),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
|
||||
VR:$merge,
|
||||
@ -3950,11 +3950,11 @@ foreach vti = AllIntegerVectors in {
|
||||
vti.RegClass:$rs1,
|
||||
GPR:$vl,
|
||||
vti.SEW)>;
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
|
||||
(vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Scalar 0),
|
||||
(vti.Mask VR:$merge),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
|
||||
(vti.Vector vti.RegClass:$rs1),
|
||||
(vti.Scalar 0),
|
||||
(vti.Mask V0),
|
||||
(XLenVT (VLOp GPR:$vl)))),
|
||||
(!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
|
||||
VR:$merge,
|
||||
vti.RegClass:$rs1,
|
||||
|
@ -285,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
||||
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
|
||||
setOperationAction(ISD::OR, MVT::i64, Custom);
|
||||
|
||||
// FIXME: Can we support these natively?
|
||||
// Expand 128 bit shifts without using a libcall.
|
||||
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
|
||||
setLibcallName(RTLIB::SRL_I128, nullptr);
|
||||
setLibcallName(RTLIB::SHL_I128, nullptr);
|
||||
setLibcallName(RTLIB::SRA_I128, nullptr);
|
||||
|
||||
// We have native instructions for i8, i16 and i32 extensions, but not i1.
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||
|
@ -885,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
|
||||
SmallVector<Value *, 16> FMCArgs;
|
||||
for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
|
||||
Constant *Clause = LPI->getClause(I);
|
||||
// As a temporary workaround for the lack of aggregate varargs support
|
||||
// in the interface between JS and wasm, break out filter operands into
|
||||
// their component elements.
|
||||
if (LPI->isFilter(I)) {
|
||||
auto *ATy = cast<ArrayType>(Clause->getType());
|
||||
for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
|
||||
Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
|
||||
FMCArgs.push_back(EV);
|
||||
}
|
||||
} else
|
||||
// TODO Handle filters (= exception specifications).
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=50396
|
||||
if (LPI->isCatch(I))
|
||||
FMCArgs.push_back(Clause);
|
||||
}
|
||||
|
||||
|
@ -37889,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
|
||||
// replicating low and high halves (and without changing the type/length of
|
||||
// the vector), we don't need the shuffle.
|
||||
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
|
||||
if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector())
|
||||
return SDValue();
|
||||
if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
|
||||
// movddup (hadd X, X) --> hadd X, X
|
||||
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
|
||||
|
@ -1344,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
|
||||
|
||||
// Any instruction that defines a 32-bit result leaves the high half of the
|
||||
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
|
||||
// be copying from a truncate. Any other 32-bit operation will zero-extend
|
||||
// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
|
||||
// 32 bits, they're probably just qualifying a CopyFromReg.
|
||||
// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
|
||||
// anything about the upper 32 bits, they're probably just qualifying a
|
||||
// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
|
||||
// operation will zero-extend up to 64 bits.
|
||||
def def32 : PatLeaf<(i32 GR32:$src), [{
|
||||
return N->getOpcode() != ISD::TRUNCATE &&
|
||||
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
|
||||
N->getOpcode() != ISD::CopyFromReg &&
|
||||
N->getOpcode() != ISD::AssertSext &&
|
||||
N->getOpcode() != ISD::AssertZext;
|
||||
N->getOpcode() != ISD::AssertZext &&
|
||||
N->getOpcode() != ISD::AssertAlign &&
|
||||
N->getOpcode() != ISD::FREEZE;
|
||||
}]>;
|
||||
|
||||
// In the case of a 32-bit def that is known to implicitly zero-extend,
|
||||
|
@ -3221,11 +3221,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
|
||||
}
|
||||
}
|
||||
|
||||
// ~(X - Y) --> ~X + Y
|
||||
if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
|
||||
if (isa<Constant>(X) || NotVal->hasOneUse())
|
||||
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
|
||||
|
||||
// ~(~X >>s Y) --> (X >>s Y)
|
||||
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
|
||||
return BinaryOperator::CreateAShr(X, Y);
|
||||
@ -3256,9 +3251,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
|
||||
return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
|
||||
}
|
||||
|
||||
// ~(X + C) --> -(C + 1) - X
|
||||
if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
|
||||
return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
|
||||
// ~(X + C) --> ~C - X
|
||||
if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C))))
|
||||
return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X);
|
||||
|
||||
// ~(X - Y) --> ~X + Y
|
||||
// FIXME: is it really beneficial to sink the `not` here?
|
||||
if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
|
||||
if (isa<Constant>(X) || NotVal->hasOneUse())
|
||||
return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
|
||||
|
||||
// ~(~X + Y) --> X - Y
|
||||
if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y))))
|
||||
|
@ -1095,7 +1095,10 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
|
||||
/// TODO: Wrapping flags could be preserved in some cases with better analysis.
|
||||
Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
|
||||
ICmpInst &Cmp) {
|
||||
if (!Cmp.isEquality())
|
||||
// Value equivalence substitution requires an all-or-nothing replacement.
|
||||
// It does not make sense for a vector compare where each lane is chosen
|
||||
// independently.
|
||||
if (!Cmp.isEquality() || Cmp.getType()->isVectorTy())
|
||||
return nullptr;
|
||||
|
||||
// Canonicalize the pattern to ICMP_EQ by swapping the select operands.
|
||||
|
@ -21,6 +21,30 @@ using namespace PatternMatch;
|
||||
|
||||
#define DEBUG_TYPE "instcombine"
|
||||
|
||||
bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
|
||||
Value *ShAmt1) {
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now..
|
||||
if (ShAmt0->getType() != ShAmt1->getType())
|
||||
return false;
|
||||
|
||||
// As input, we have the following pattern:
|
||||
// Sh0 (Sh1 X, Q), K
|
||||
// We want to rewrite that as:
|
||||
// Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
|
||||
// While we know that originally (Q+K) would not overflow
|
||||
// (because 2 * (N-1) u<= iN -1), we have looked past extensions of
|
||||
// shift amounts. so it may now overflow in smaller bitwidth.
|
||||
// To ensure that does not happen, we need to ensure that the total maximal
|
||||
// shift amount is still representable in that smaller bit width.
|
||||
unsigned MaximalPossibleTotalShiftAmount =
|
||||
(Sh0->getType()->getScalarSizeInBits() - 1) +
|
||||
(Sh1->getType()->getScalarSizeInBits() - 1);
|
||||
APInt MaximalRepresentableShiftAmount =
|
||||
APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
|
||||
return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
|
||||
}
|
||||
|
||||
// Given pattern:
|
||||
// (x shiftopcode Q) shiftopcode K
|
||||
// we should rewrite it as
|
||||
@ -57,26 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
|
||||
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
|
||||
return nullptr;
|
||||
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now..
|
||||
if (ShAmt0->getType() != ShAmt1->getType())
|
||||
return nullptr;
|
||||
|
||||
// As input, we have the following pattern:
|
||||
// Sh0 (Sh1 X, Q), K
|
||||
// We want to rewrite that as:
|
||||
// Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
|
||||
// While we know that originally (Q+K) would not overflow
|
||||
// (because 2 * (N-1) u<= iN -1), we have looked past extensions of
|
||||
// shift amounts. so it may now overflow in smaller bitwidth.
|
||||
// To ensure that does not happen, we need to ensure that the total maximal
|
||||
// shift amount is still representable in that smaller bit width.
|
||||
unsigned MaximalPossibleTotalShiftAmount =
|
||||
(Sh0->getType()->getScalarSizeInBits() - 1) +
|
||||
(Sh1->getType()->getScalarSizeInBits() - 1);
|
||||
APInt MaximalRepresentableShiftAmount =
|
||||
APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
|
||||
if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
|
||||
// Verify that it would be safe to try to add those two shift amounts.
|
||||
if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1))
|
||||
return nullptr;
|
||||
|
||||
// We are only looking for signbit extraction if we have two right shifts.
|
||||
@ -220,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
|
||||
// Peek through an optional zext of the shift amount.
|
||||
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
|
||||
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now.
|
||||
if (MaskShAmt->getType() != ShiftShAmt->getType())
|
||||
// Verify that it would be safe to try to add those two shift amounts.
|
||||
if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
|
||||
MaskShAmt))
|
||||
return nullptr;
|
||||
|
||||
// Can we simplify (MaskShAmt+ShiftShAmt) ?
|
||||
@ -252,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
|
||||
// Peek through an optional zext of the shift amount.
|
||||
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
|
||||
|
||||
// We have two shift amounts from two different shifts. The types of those
|
||||
// shift amounts may not match. If that's the case let's bailout now.
|
||||
if (MaskShAmt->getType() != ShiftShAmt->getType())
|
||||
// Verify that it would be safe to try to add those two shift amounts.
|
||||
if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
|
||||
MaskShAmt))
|
||||
return nullptr;
|
||||
|
||||
// Can we simplify (ShiftShAmt-MaskShAmt) ?
|
||||
|
@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
|
||||
DenseSet<Instruction*> V;
|
||||
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
|
||||
for (auto *I : V) {
|
||||
if (I->mayHaveSideEffects()) {
|
||||
LLVM_DEBUG(dbgs() << "LRR: Aborting - "
|
||||
<< "An instruction which does not belong to any root "
|
||||
<< "sets must not have side effects: " << *I);
|
||||
return false;
|
||||
}
|
||||
Uses[I].set(IL_All);
|
||||
}
|
||||
|
||||
|
@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled,
|
||||
"Number of 'objectsize' intrinsic calls handled");
|
||||
|
||||
static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
|
||||
Value *Op = II->getOperand(0);
|
||||
|
||||
return isa<Constant>(Op) ? ConstantInt::getTrue(II->getType())
|
||||
: ConstantInt::getFalse(II->getType());
|
||||
if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
|
||||
if (C->isManifestConstant())
|
||||
return ConstantInt::getTrue(II->getType());
|
||||
return ConstantInt::getFalse(II->getType());
|
||||
}
|
||||
|
||||
static bool replaceConditionalBranchesOnConstant(Instruction *II,
|
||||
|
@ -542,9 +542,14 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
|
||||
|
||||
auto Iter = AdditionalUsers.find(I);
|
||||
if (Iter != AdditionalUsers.end()) {
|
||||
// Copy additional users before notifying them of changes, because new
|
||||
// users may be added, potentially invalidating the iterator.
|
||||
SmallVector<Instruction *, 2> ToNotify;
|
||||
for (User *U : Iter->second)
|
||||
if (auto *UI = dyn_cast<Instruction>(U))
|
||||
OperandChangedState(UI);
|
||||
ToNotify.push_back(UI);
|
||||
for (Instruction *UI : ToNotify)
|
||||
OperandChangedState(UI);
|
||||
}
|
||||
}
|
||||
void handleCallOverdefined(CallBase &CB);
|
||||
|
@ -780,7 +780,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
||||
/// When inlining a call site that has !llvm.mem.parallel_loop_access,
|
||||
/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
|
||||
/// be propagated to all memory-accessing cloned instructions.
|
||||
static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
|
||||
static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
|
||||
Function::iterator FEnd) {
|
||||
MDNode *MemParallelLoopAccess =
|
||||
CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
|
||||
MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
|
||||
@ -789,41 +790,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
|
||||
if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
|
||||
return;
|
||||
|
||||
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
|
||||
VMI != VMIE; ++VMI) {
|
||||
// Check that key is an instruction, to skip the Argument mapping, which
|
||||
// points to an instruction in the original function, not the inlined one.
|
||||
if (!VMI->second || !isa<Instruction>(VMI->first))
|
||||
continue;
|
||||
for (BasicBlock &BB : make_range(FStart, FEnd)) {
|
||||
for (Instruction &I : BB) {
|
||||
// This metadata is only relevant for instructions that access memory.
|
||||
if (!I.mayReadOrWriteMemory())
|
||||
continue;
|
||||
|
||||
Instruction *NI = dyn_cast<Instruction>(VMI->second);
|
||||
if (!NI)
|
||||
continue;
|
||||
|
||||
// This metadata is only relevant for instructions that access memory.
|
||||
if (!NI->mayReadOrWriteMemory())
|
||||
continue;
|
||||
|
||||
if (MemParallelLoopAccess) {
|
||||
// TODO: This probably should not overwrite MemParalleLoopAccess.
|
||||
MemParallelLoopAccess = MDNode::concatenate(
|
||||
NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access),
|
||||
MemParallelLoopAccess);
|
||||
NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access,
|
||||
if (MemParallelLoopAccess) {
|
||||
// TODO: This probably should not overwrite MemParalleLoopAccess.
|
||||
MemParallelLoopAccess = MDNode::concatenate(
|
||||
I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
|
||||
MemParallelLoopAccess);
|
||||
I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
|
||||
MemParallelLoopAccess);
|
||||
}
|
||||
|
||||
if (AccessGroup)
|
||||
I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
|
||||
I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
|
||||
|
||||
if (AliasScope)
|
||||
I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
|
||||
I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
|
||||
|
||||
if (NoAlias)
|
||||
I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
|
||||
I.getMetadata(LLVMContext::MD_noalias), NoAlias));
|
||||
}
|
||||
|
||||
if (AccessGroup)
|
||||
NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
|
||||
NI->getMetadata(LLVMContext::MD_access_group), AccessGroup));
|
||||
|
||||
if (AliasScope)
|
||||
NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
|
||||
NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope));
|
||||
|
||||
if (NoAlias)
|
||||
NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
|
||||
NI->getMetadata(LLVMContext::MD_noalias), NoAlias));
|
||||
}
|
||||
}
|
||||
|
||||
@ -844,9 +837,9 @@ class ScopedAliasMetadataDeepCloner {
|
||||
/// subsequent remap() calls.
|
||||
void clone();
|
||||
|
||||
/// Remap instructions in the given VMap from the original to the cloned
|
||||
/// Remap instructions in the given range from the original to the cloned
|
||||
/// metadata.
|
||||
void remap(ValueToValueMapTy &VMap);
|
||||
void remap(Function::iterator FStart, Function::iterator FEnd);
|
||||
};
|
||||
|
||||
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
|
||||
@ -907,34 +900,27 @@ void ScopedAliasMetadataDeepCloner::clone() {
|
||||
}
|
||||
}
|
||||
|
||||
void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
|
||||
void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
|
||||
Function::iterator FEnd) {
|
||||
if (MDMap.empty())
|
||||
return; // Nothing to do.
|
||||
|
||||
for (auto Entry : VMap) {
|
||||
// Check that key is an instruction, to skip the Argument mapping, which
|
||||
// points to an instruction in the original function, not the inlined one.
|
||||
if (!Entry->second || !isa<Instruction>(Entry->first))
|
||||
continue;
|
||||
for (BasicBlock &BB : make_range(FStart, FEnd)) {
|
||||
for (Instruction &I : BB) {
|
||||
// TODO: The null checks for the MDMap.lookup() results should no longer
|
||||
// be necessary.
|
||||
if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
|
||||
if (MDNode *MNew = MDMap.lookup(M))
|
||||
I.setMetadata(LLVMContext::MD_alias_scope, MNew);
|
||||
|
||||
Instruction *I = dyn_cast<Instruction>(Entry->second);
|
||||
if (!I)
|
||||
continue;
|
||||
if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
|
||||
if (MDNode *MNew = MDMap.lookup(M))
|
||||
I.setMetadata(LLVMContext::MD_noalias, MNew);
|
||||
|
||||
// Only update scopes when we find them in the map. If they are not, it is
|
||||
// because we already handled that instruction before. This is faster than
|
||||
// tracking which instructions we already updated.
|
||||
if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
|
||||
if (MDNode *MNew = MDMap.lookup(M))
|
||||
I->setMetadata(LLVMContext::MD_alias_scope, MNew);
|
||||
|
||||
if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
|
||||
if (MDNode *MNew = MDMap.lookup(M))
|
||||
I->setMetadata(LLVMContext::MD_noalias, MNew);
|
||||
|
||||
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
|
||||
if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
|
||||
Decl->setScopeList(MNew);
|
||||
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
|
||||
if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
|
||||
Decl->setScopeList(MNew);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1926,7 +1912,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
|
||||
|
||||
// Now clone the inlined noalias scope metadata.
|
||||
SAMetadataCloner.clone();
|
||||
SAMetadataCloner.remap(VMap);
|
||||
SAMetadataCloner.remap(FirstNewBlock, Caller->end());
|
||||
|
||||
// Add noalias metadata if necessary.
|
||||
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
|
||||
@ -1936,7 +1922,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
|
||||
AddReturnAttributes(CB, VMap);
|
||||
|
||||
// Propagate metadata on the callsite if necessary.
|
||||
PropagateCallSiteMetadata(CB, VMap);
|
||||
PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
|
||||
|
||||
// Register any cloned assumptions.
|
||||
if (IFI.GetAssumptionCache)
|
||||
|
Loading…
Reference in New Issue
Block a user