From e4bbddaec8689e1b24f25e88958bea700e989542 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 18 Jun 2021 21:08:25 +0200 Subject: [PATCH] Vendor import of llvm-project branch release/12.x llvmorg-12.0.1-rc2-0-ge7dac564cd0e, a.k.a. 12.0.1 rc2. --- clang/lib/Basic/Targets/PPC.cpp | 1 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 32 +- clang/lib/CodeGen/CodeGenModule.cpp | 12 +- clang/lib/Format/TokenAnnotator.cpp | 12 +- clang/lib/Format/UnwrappedLineFormatter.cpp | 7 - clang/lib/Format/UnwrappedLineParser.cpp | 89 +++-- clang/lib/Format/UnwrappedLineParser.h | 12 +- clang/lib/Headers/ppc_wrappers/xmmintrin.h | 9 +- clang/lib/Sema/SemaChecking.cpp | 5 +- libcxx/include/memory | 32 +- lld/ELF/SyntheticSections.cpp | 4 +- lldb/docs/man/lldb.rst | 4 +- lldb/tools/driver/Driver.cpp | 4 +- llvm/include/llvm/ADT/Any.h | 7 +- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 1 + llvm/include/llvm/IR/Constant.h | 4 + llvm/lib/Analysis/ConstantFolding.cpp | 15 +- llvm/lib/Analysis/InstructionSimplify.cpp | 8 +- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 4 +- llvm/lib/Analysis/ScalarEvolution.cpp | 4 + llvm/lib/Analysis/ValueTracking.cpp | 15 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 5 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 44 ++- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 24 +- .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 33 +- llvm/lib/CodeGen/StackProtector.cpp | 24 +- llvm/lib/IR/Constants.cpp | 12 + llvm/lib/MC/ELFObjectWriter.cpp | 11 + llvm/lib/Support/CommandLine.cpp | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 43 +- .../GISel/AArch64InstructionSelector.cpp | 2 +- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 22 +- llvm/lib/Target/BPF/BPFMIPeephole.cpp | 3 + llvm/lib/Target/BPF/BPFPreserveDIType.cpp | 9 + llvm/lib/Target/BPF/BPFTargetMachine.cpp | 6 + llvm/lib/Target/BPF/BPFTargetMachine.h | 2 + llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 61 +++ llvm/lib/Target/BPF/BTF.def | 1 + llvm/lib/Target/BPF/BTFDebug.cpp | 62 ++- llvm/lib/Target/BPF/BTFDebug.h | 11 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 6 + llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 367 +++++++++--------- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 62 +++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 43 +- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 21 + .../Target/PowerPC/PPCTargetTransformInfo.h | 3 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 20 +- .../Target/SystemZ/SystemZISelLowering.cpp | 5 +- .../WebAssemblyLowerEmscriptenEHSjLj.cpp | 13 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + llvm/lib/Target/X86/X86InstrCompiler.td | 11 +- .../InstCombine/InstCombineAndOrXor.cpp | 17 +- .../InstCombine/InstCombineSelect.cpp | 5 +- .../InstCombine/InstCombineShifts.cpp | 58 +-- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp | 6 + .../Scalar/LowerConstantIntrinsics.cpp | 8 +- llvm/lib/Transforms/Scalar/SCCP.cpp | 7 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 106 +++-- 59 files changed, 914 insertions(+), 505 deletions(-) create mode 100644 llvm/lib/Target/BPF/BPFTargetTransformInfo.h diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index ff09c0fa2a23..6c3036836c6d 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -57,6 +57,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector &Features, } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; } else if (Feature == "+spe" || Feature == "+efpu2") { + HasStrictFP = false; HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 83dfa0780547..caa5291ff6fa 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -409,6 +409,7 @@ class InlinedOpenMPRegionRAII { llvm::DenseMap LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; const CodeGen::CGBlockInfo *BlockInfo = nullptr; + bool NoInheritance = false; public: /// Constructs region for combined constructs. @@ -416,16 +417,19 @@ class InlinedOpenMPRegionRAII { /// a list of functions used for code generation of implicitly inlined /// regions. InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) - : CGF(CGF) { + OpenMPDirectiveKind Kind, bool HasCancel, + bool NoInheritance = true) + : CGF(CGF), NoInheritance(NoInheritance) { // Start emission for the construct. CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - LambdaThisCaptureField = CGF.LambdaThisCaptureField; - CGF.LambdaThisCaptureField = nullptr; - BlockInfo = CGF.BlockInfo; - CGF.BlockInfo = nullptr; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + LambdaThisCaptureField = CGF.LambdaThisCaptureField; + CGF.LambdaThisCaptureField = nullptr; + BlockInfo = CGF.BlockInfo; + CGF.BlockInfo = nullptr; + } } ~InlinedOpenMPRegionRAII() { @@ -434,9 +438,11 @@ class InlinedOpenMPRegionRAII { cast(CGF.CapturedStmtInfo)->getOldCSI(); delete CGF.CapturedStmtInfo; CGF.CapturedStmtInfo = OldCSI; - std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); - CGF.LambdaThisCaptureField = LambdaThisCaptureField; - CGF.BlockInfo = BlockInfo; + if (NoInheritance) { + std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); + CGF.LambdaThisCaptureField = LambdaThisCaptureField; + CGF.BlockInfo = BlockInfo; + } } }; @@ -3853,7 +3859,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Processing for implicitly captured variables. InlinedOpenMPRegionRAII Region( CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, - /*HasCancel=*/false); + /*HasCancel=*/false, /*NoInheritance=*/true); SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } if (Type->isArrayType()) { @@ -6214,7 +6220,9 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, bool HasCancel) { if (!CGF.HaveInsertPoint()) return; - InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); + InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, + InnerKind != OMPD_critical && + InnerKind != OMPD_master); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 31afbc6b4262..9c9bd4e374af 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6215,15 +6215,17 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() { return *SanStats; } + llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); - auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); - auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); - return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy, - "__translate_sampler_initializer"), - {C}); + auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); + auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); + auto *Call = CGF.Builder.CreateCall( + CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); + Call->setCallingConv(Call->getCalledFunction()->getCallingConv()); + return Call; } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 34c291ecc492..82d6cfed308d 100755 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1917,12 +1917,12 @@ class AnnotatingParser { if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) return true; - if (Tok.Next->is(tok::l_paren) && - !(Tok.Previous && Tok.Previous->is(tok::identifier) && - Tok.Previous->Previous && - Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow, - tok::star))) - return true; + // Look for a cast `( x ) (`. + if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) { + if (Tok.Previous->is(tok::identifier) && + Tok.Previous->Previous->is(tok::l_paren)) + return true; + } if (!Tok.Next->Next) return false; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 5dd0ccdfa6fd..7d197310e65b 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -1281,13 +1281,6 @@ void UnwrappedLineFormatter::formatFirstToken( if (Newlines) Indent = NewlineIndent; - // If in Whitemsmiths mode, indent start and end of blocks - if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { - if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case, - tok::kw_default)) - Indent += Style.IndentWidth; - } - // Preprocessor directives get indented before the hash only if specified if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash && (Line.Type == LT_PreprocessorDirective || diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index f689a6361a3a..bec18bd5d8df 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -579,17 +579,23 @@ size_t UnwrappedLineParser::computePPHash() const { return h; } -void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, - bool MunchSemi) { +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, + bool MunchSemi, + bool UnindentWhitesmithsBraces) { assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && "'{' or macro block token expected"); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); + // For Whitesmiths mode, jump to the next level prior to skipping over the + // braces. + if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + size_t PPStartHash = computePPHash(); unsigned InitialLevel = Line->Level; - nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); + nextToken(/*LevelDifference=*/AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -602,10 +608,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, ? (UnwrappedLine::kInvalidIndex) : (CurrentLines->size() - 1 - NbPreprocessorDirectives); + // Whitesmiths is weird here. The brace needs to be indented for the namespace + // block, but the block itself may not be indented depending on the style + // settings. This allows the format to back up one level in those cases. + if (UnindentWhitesmithsBraces) + --Line->Level; + ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); - if (AddLevel) - ++Line->Level; + if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) + Line->Level += AddLevels; parseLevel(/*HasOpeningBrace=*/true); if (eof()) @@ -621,7 +633,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, size_t PPEndHash = computePPHash(); // Munch the closing brace. - nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); + nextToken(/*LevelDifference=*/-AddLevels); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); @@ -637,6 +649,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, nextToken(); Line->Level = InitialLevel; + FormatTok->setBlockKind(BK_Block); if (PPStartHash == PPEndHash) { Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; @@ -2128,15 +2141,34 @@ void UnwrappedLineParser::parseNamespace() { if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); - bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || - (Style.NamespaceIndentation == FormatStyle::NI_Inner && - DeclarationScopeStack.size() > 1); - parseBlock(/*MustBeDeclaration=*/true, AddLevel); + unsigned AddLevels = + Style.NamespaceIndentation == FormatStyle::NI_All || + (Style.NamespaceIndentation == FormatStyle::NI_Inner && + DeclarationScopeStack.size() > 1) + ? 1u + : 0u; + bool ManageWhitesmithsBraces = + AddLevels == 0u && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + + // If we're in Whitesmiths mode, indent the brace if we're not indenting + // the whole block. + if (ManageWhitesmithsBraces) + ++Line->Level; + + parseBlock(/*MustBeDeclaration=*/true, AddLevels, + /*MunchSemi=*/true, + /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); + // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. if (FormatTok->Tok.is(tok::semi)) nextToken(); - addUnwrappedLine(); + + addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); + + if (ManageWhitesmithsBraces) + --Line->Level; } // FIXME: Add error handling. } @@ -2222,6 +2254,11 @@ void UnwrappedLineParser::parseDoWhile() { return; } + // If in Whitesmiths mode, the line with the while() needs to be indented + // to the same level as the block. + if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) + ++Line->Level; + nextToken(); parseStructuralElement(); } @@ -2234,25 +2271,19 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { if (LeftAlignLabel) Line->Level = 0; - bool RemoveWhitesmithsCaseIndent = - (!Style.IndentCaseBlocks && - Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths); - - if (RemoveWhitesmithsCaseIndent) - --Line->Level; - if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter( - this, Line->Level, Style.BraceWrapping.AfterCaseLabel, - Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent); + CompoundStatementIndenter Indenter(this, Line->Level, + Style.BraceWrapping.AfterCaseLabel, + Style.BraceWrapping.IndentBraces); parseBlock(/*MustBeDeclaration=*/false); if (FormatTok->Tok.is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); - if (RemoveWhitesmithsCaseIndent) { + if (!Style.IndentCaseBlocks && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { Line->Level++; } } @@ -2920,17 +2951,29 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, llvm::dbgs() << "\n"; } -void UnwrappedLineParser::addUnwrappedLine() { +void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ if (CurrentLines == &Lines) printDebugInfo(*Line); }); + + // If this line closes a block when in Whitesmiths mode, remember that + // information so that the level can be decreased after the line is added. + // This has to happen after the addition of the line since the line itself + // needs to be indented. + bool ClosesWhitesmithsBlock = + Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && + Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; + CurrentLines->push_back(std::move(*Line)); Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; + + if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) + --Line->Level; if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 02b328cb72de..ce135fac5e57 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -85,8 +85,9 @@ class UnwrappedLineParser { void reset(); void parseFile(); void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, bool AddLevel = true, - bool MunchSemi = true); + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u, + bool MunchSemi = true, + bool UnindentWhitesmithsBraces = false); void parseChildBlock(); void parsePPDirective(); void parsePPDefine(); @@ -140,7 +141,12 @@ class UnwrappedLineParser { bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); bool tryToParseSimpleAttribute(); - void addUnwrappedLine(); + + // Used by addUnwrappedLine to denote whether to keep or remove a level + // when resetting the line state. + enum class LineLevel { Remove, Keep }; + + void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove); bool eof() const; // LevelDifference is the difference of levels after and before the current // token. For example: diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h index 0f429fa04081..0e45b96769f8 100644 --- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h +++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h @@ -28,7 +28,7 @@ Most SSE scalar float intrinsic operations can be performed more efficiently as C language float scalar operations or optimized to use vector SIMD operations. We recommend this for new applications. */ -#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." #endif #ifndef _XMMINTRIN_H_INCLUDED @@ -62,14 +62,13 @@ /* The Intel API is flexible enough that we must allow aliasing with other vector types, and their scalar components. */ -typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef vector float __m128 __attribute__((__may_alias__)); /* Unaligned version of the same type. */ -typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, - __aligned__ (1))); +typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1))); /* Internal data types for implementing the intrinsics. */ -typedef float __v4sf __attribute__ ((__vector_size__ (16))); +typedef vector float __v4sf; /* Create an undefined vector. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2d3d36f4adad..2b55712d44c2 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2623,7 +2623,10 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) { return false; const auto *CE = dyn_cast(UO->getSubExpr()); - if (!CE || CE->getCastKind() != CK_IntegralToPointer) + if (!CE) + return false; + if (CE->getCastKind() != CK_IntegralToPointer && + CE->getCastKind() != CK_NullToPointer) return false; // The integer must be from an EnumConstantDecl. diff --git a/libcxx/include/memory b/libcxx/include/memory index 39d0f5bee6a5..efb10c8fd25b 100644 --- a/libcxx/include/memory +++ b/libcxx/include/memory @@ -99,14 +99,14 @@ struct allocator_traits }; template <> -class allocator // deprecated in C++17, removed in C++20 +class allocator // removed in C++20 { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + typedef void* pointer; // deprecated in C++17 + typedef const void* const_pointer; // deprecated in C++17 + typedef void value_type; // deprecated in C++17 - template struct rebind {typedef allocator<_Up> other;}; + template struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17 }; template @@ -786,27 +786,27 @@ to_address(const _Pointer& __p) _NOEXCEPT template class allocator; -#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS) +#if _LIBCPP_STD_VER <= 17 template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; template <> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator +class _LIBCPP_TEMPLATE_VIS allocator { public: - typedef const void* pointer; - typedef const void* const_pointer; - typedef const void value_type; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; + _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; - template struct rebind {typedef allocator<_Up> other;}; + template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; #endif diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 9a875bd7ec3e..70c36c63d101 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3110,7 +3110,9 @@ size_t VersionTableSection::getSize() const { void VersionTableSection::writeTo(uint8_t *buf) { buf += 2; for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) { - write16(buf, s.sym->versionId); + // Use the original versionId for an unfetched lazy symbol (undefined weak), + // which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error). + write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId); buf += 2; } } diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst index 6dca15fa35dc..b75288db380d 100644 --- a/lldb/docs/man/lldb.rst +++ b/lldb/docs/man/lldb.rst @@ -256,11 +256,11 @@ executable. To disambiguate between arguments passed to lldb and arguments passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes :program:`lldb` to immediately attach to the given process. diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index e4a60127b65e..210a712f9741 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -751,11 +751,11 @@ static void printHelp(LLDBOptTable &table, llvm::StringRef tool_name) { arguments passed to the debugged executable, arguments starting with a - must be passed after --. - lldb --arch x86_64 /path/to/program program argument -- --arch arvm7 + lldb --arch x86_64 /path/to/program program argument -- --arch armv7 For convenience, passing the executable after -- is also supported. - lldb --arch x86_64 -- /path/to/program program argument --arch arvm7 + lldb --arch x86_64 -- /path/to/program program argument --arch armv7 Passing one of the attach options causes lldb to immediately attach to the given process. diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h index 0aded628cda4..1e3abca70679 100644 --- a/llvm/include/llvm/ADT/Any.h +++ b/llvm/include/llvm/ADT/Any.h @@ -23,7 +23,12 @@ namespace llvm { -class Any { +class LLVM_EXTERNAL_VISIBILITY Any { + + // The `Typeid::Id` static data member below is a globally unique + // identifier for the type `T`. It is explicitly marked with default + // visibility so that when `-fvisibility=hidden` is used, the loader still + // merges duplicate definitions across DSO boundaries. template struct TypeId { static const char Id; }; struct StorageBase { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index c3b494e94ff1..4a982b00125d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -316,6 +316,7 @@ class LegalizerHelper { LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty); + LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 0190aca27b72..71692c746015 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -214,6 +214,10 @@ class Constant : public User { /// both must either be scalars or vectors with the same element count. If no /// changes are made, the constant C is returned. static Constant *mergeUndefsWith(Constant *C, Constant *Other); + + /// Return true if a constant is ConstantData or a ConstantAggregate or + /// ConstantExpr that contain only ConstantData. + bool isManifestConstant() const; }; } // end namespace llvm diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index f73890d548f0..cc1ce4c65821 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1808,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) { return APF.convertToDouble(); } -static bool isManifestConstant(const Constant *c) { - if (isa(c)) { - return true; - } else if (isa(c) || isa(c)) { - for (const Value *subc : c->operand_values()) { - if (!isManifestConstant(cast(subc))) - return false; - } - return true; - } - return false; -} - static bool getConstIntOrUndef(Value *Op, const APInt *&C) { if (auto *CI = dyn_cast(Op)) { C = &CI->getValue(); @@ -1845,7 +1832,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, // We know we have a "Constant" argument. But we want to only // return true for manifest constants, not those that depend on // constants with unknowable values, e.g. GlobalValue or BlockAddress. - if (isManifestConstant(Operands[0])) + if (Operands[0]->isManifestConstant()) return ConstantInt::getTrue(Ty->getContext()); return nullptr; } diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index c40e5c36cdc7..a12816885c40 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4127,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal, TrueVal, FalseVal)) return V; - // If we have an equality comparison, then we know the value in one of the - // arms of the select. See if substituting this value into the arm and + // If we have a scalar equality comparison, then we know the value in one of + // the arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. - if (Pred == ICmpInst::ICMP_EQ) { + // Note that the equivalence/replacement opportunity does not hold for vectors + // because each element of a vector select is chosen independently. + if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) { if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, /* AllowRefinement */ false, MaxRecurse) == TrueVal || diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 895936d47175..886b5bf4acd3 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -344,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, // If we hit load/store with the same invariant.group metadata (and the // same pointer operand) we can assume that value pointed by pointer // operand didn't change. - if ((isa(U) || isa(U)) && + if ((isa(U) || + (isa(U) && + cast(U)->getPointerOperand() == Ptr)) && U->hasMetadata(LLVMContext::MD_invariant_group)) ClosestDependency = GetClosestDependency(ClosestDependency, U); } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index fe9d8297d679..1a9ae68573e9 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10622,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, if (!dominates(RHS, IncBB)) return false; const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); + // Make sure L does not refer to a value from a potentially previous + // iteration of a loop. + if (!properlyDominates(L, IncBB)) + return false; if (!ProvedEasily(L, RHS)) return false; } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e174c5efe424..75486d3c80e7 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5150,6 +5150,9 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, return false; } + // Limit number of instructions we look at, to avoid scanning through large + // blocks. The current limit is chosen arbitrarily. + unsigned ScanLimit = 32; BasicBlock::const_iterator End = BB->end(); if (!PoisonOnly) { @@ -5160,6 +5163,11 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, // For example, 'udiv x, (undef | 1)' isn't UB. for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + break; + if (const auto *CB = dyn_cast(&I)) { for (unsigned i = 0; i < CB->arg_size(); ++i) { if (CB->paramHasAttr(i, Attribute::NoUndef) && @@ -5186,9 +5194,12 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, for_each(V->users(), Propagate); Visited.insert(BB); - unsigned Iter = 0; - while (Iter++ < MaxAnalysisRecursionDepth) { + while (true) { for (auto &I : make_range(Begin, End)) { + if (isa(I)) + continue; + if (--ScanLimit == 0) + return false; if (mustTriggerUB(&I, YieldsPoison)) return true; if (!isGuaranteedToTransferExecutionToSuccessor(&I)) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b97c369b832d..b7883cbc3120 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (CI && CI->getZExtValue() == 1 && - MRI->getType(CondLHS).getSizeInBits() == 1 && - CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && + CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 3178ee16af2b..66871ca3b926 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1257,22 +1257,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_FPTOUI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); - Observer.changedInstr(MI); - return Legalized; - } - case TargetOpcode::G_FPTOSI: { - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_FPTOSI: + return narrowScalarFPTOI(MI, TypeIdx, NarrowTy); case TargetOpcode::G_FPEXT: if (TypeIdx != 0) return UnableToLegalize; @@ -4496,6 +4483,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { return Legalized; } +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI; + + Register Src = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src); + + // If all finite floats fit into the narrowed integer type, we can just swap + // out the result type. This is practically only useful for conversions from + // half to at least 16-bits, so just handle the one case. + if (SrcTy.getScalarType() != LLT::scalar(16) || + NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16)) + return UnableToLegalize; + + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, + IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6a6f83827f72..7f2add81e80d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7105,14 +7105,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { if (LegalOperations) return SDValue(); - // Collect all the stores in the chain. - SDValue Chain; - SmallVector Stores; - for (StoreSDNode *Store = N; Store; Store = dyn_cast(Chain)) { - // TODO: Allow unordered atomics when wider type is legal (see D66309) - EVT MemVT = Store->getMemoryVT(); - if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || - !Store->isSimple() || Store->isIndexed()) + // We only handle merging simple stores of 1-4 bytes. + // TODO: Allow unordered atomics when wider type is legal (see D66309) + EVT MemVT = N->getMemoryVT(); + if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) || + !N->isSimple() || N->isIndexed()) + return SDValue(); + + // Collect all of the stores in the chain. + SDValue Chain = N->getChain(); + SmallVector Stores = {N}; + while (auto *Store = dyn_cast(Chain)) { + // All stores must be the same size to ensure that we are writing all of the + // bytes in the wide value. + // TODO: We could allow multiple sizes by tracking each stored byte. + if (Store->getMemoryVT() != MemVT || !Store->isSimple() || + Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7bae5048fc0e..d17dd1c5eccb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1691,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { /// terminator, but additionally the copies that move the vregs into the /// physical registers. static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB) { +FindSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII) { MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - // if (SplitPoint == BB->begin()) return SplitPoint; @@ -1701,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) { MachineBasicBlock::iterator Previous = SplitPoint; --Previous; + if (TII.isTailCall(*SplitPoint) && + Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { + // call itself, then we must insert before the sequence even starts. For + // example: + // + // ADJCALLSTACKDOWN ... + // + // ADJCALLSTACKUP ... + // TAILJMP somewhere + // On the other hand, it could be an unrelated call in which case this tail call + // has to register moves of its own and should be the split point. For example: + // ADJCALLSTACKDOWN + // CALL something_else + // ADJCALLSTACKUP + // + // TAILJMP somewhere + do { + --Previous; + if (Previous->isCall()) + return SplitPoint; + } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); + + return Previous; + } + while (MIIsInTerminatorSequence(*Previous)) { SplitPoint = Previous; if (Previous == Start) @@ -1740,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() { // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1759,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB); + FindSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index 8d91afb6e99d..10c6dcbdb049 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() { // instrumentation has already been generated. HasIRCheck = true; + // If we're instrumenting a block with a musttail call, the check has to be + // inserted before the call rather than between it and the return. The + // verifier guarantees that a musttail call is either directly before the + // return or with a single correct bitcast of the return value in between so + // we don't need to worry about many situations here. + Instruction *CheckLoc = RI; + Instruction *Prev = RI->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + else if (Prev) { + Prev = Prev->getPrevNonDebugInstruction(); + if (Prev && isa(Prev) && cast(Prev)->isMustTailCall()) + CheckLoc = Prev; + } + // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. - IRBuilder<> B(RI); + IRBuilder<> B(CheckLoc); LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); Call->setAttributes(GuardCheck->getAttributes()); Call->setCallingConv(GuardCheck->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. - // If we do not support SelectionDAG based tail calls, generate IR level - // tail calls. + // If we do not support SelectionDAG based calls, generate IR level + // calls. // // For each block with a return instruction, convert this: // @@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); + BasicBlock *NewBB = + BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index 6fd205c654a8..9f05917cf7cc 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -803,6 +803,18 @@ Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) { return C; } +bool Constant::isManifestConstant() const { + if (isa(this)) + return true; + if (isa(this) || isa(this)) { + for (const Value *Op : operand_values()) + if (!cast(Op)->isManifestConstant()) + return false; + return true; + } + return false; +} + //===----------------------------------------------------------------------===// // ConstantInt //===----------------------------------------------------------------------===// diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 69307b617552..2d810ffd350b 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, if (TargetObjectWriter->getEMachine() == ELF::EM_386 && Type == ELF::R_386_GOTOFF) return true; + + // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so + // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an + // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in + // range of a MergeInputSection. We could introduce a new RelExpr member + // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12) + // but the complexity is unnecessary given that GNU as keeps the original + // symbol for this case as well. + if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS && + !hasRelocationAddend()) + return true; } // Most TLS relocations use a got, so they need the symbol. Even those that diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index e2f014d1815b..123a23a5242c 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent, void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent, size_t FirstLineIndentedBy) { const StringRef ValHelpPrefix = " "; - assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size()); + assert(BaseIndent >= FirstLineIndentedBy); std::pair Split = HelpStr.split('\n'); outs().indent(BaseIndent - FirstLineIndentedBy) << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n"; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1451151f4dc5..c522ee76626d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16335,25 +16335,36 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size > 128) return AtomicExpansionKind::None; - // Nand not supported in LSE. - if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; - // Leave 128 bits to LLSC. - if (Subtarget->hasLSE() && Size < 128) - return AtomicExpansionKind::None; - if (Subtarget->outlineAtomics() && Size < 128) { - // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. - // Don't outline them unless - // (1) high level support approved: - // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf - // (2) low level libgcc and compiler-rt support implemented by: - // min/max outline atomics helpers - if (AI->getOperation() != AtomicRMWInst::Min && - AI->getOperation() != AtomicRMWInst::Max && - AI->getOperation() != AtomicRMWInst::UMin && - AI->getOperation() != AtomicRMWInst::UMax) { + + // Nand is not supported in LSE. + // Leave 128 bits to LLSC or CmpXChg. + if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) { + if (Subtarget->hasLSE()) return AtomicExpansionKind::None; + if (Subtarget->outlineAtomics()) { + // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far. + // Don't outline them unless + // (1) high level support approved: + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf + // (2) low level libgcc and compiler-rt support implemented by: + // min/max outline atomics helpers + if (AI->getOperation() != AtomicRMWInst::Min && + AI->getOperation() != AtomicRMWInst::Max && + AI->getOperation() != AtomicRMWInst::UMin && + AI->getOperation() != AtomicRMWInst::UMax) { + return AtomicExpansionKind::None; + } } } + + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + return AtomicExpansionKind::LLSC; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 5259f4f5a4d0..fc5ef02e8457 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1791,7 +1791,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr( NegOpc = AArch64::NEGv8i16; } else if (Ty == LLT::vector(16, 8)) { Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; - NegOpc = AArch64::NEGv8i16; + NegOpc = AArch64::NEGv16i8; } else if (Ty == LLT::vector(8, 8)) { Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; NegOpc = AArch64::NEGv8i8; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 112eb59e173d..e418d53b56a4 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5934,6 +5934,9 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register || AddrMode == ARMII::AddrModeT2_pc // PCrel access || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST + || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE + || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR || AddrMode == ARMII::AddrModeNone) return false; @@ -5976,6 +5979,10 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT2_i8s4: + // FIXME: Values are already scaled in this addressing mode. + assert((Fixup & 3) == 0 && "Can't encode this offset!"); + NumBits = 10; + break; case ARMII::AddrModeT2_ldrex: NumBits = 8; Scale = 4; @@ -5984,17 +5991,6 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, case ARMII::AddrMode_i12: NumBits = 12; break; - case ARMII::AddrModeT2_i7: - NumBits = 7; - break; - case ARMII::AddrModeT2_i7s2: - NumBits = 7; - Scale = 2; - break; - case ARMII::AddrModeT2_i7s4: - NumBits = 7; - Scale = 4; - break; case ARMII::AddrModeT1_s: // SP-relative LD/ST NumBits = 8; Scale = 4; @@ -6004,8 +6000,8 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI, } // Make sure the offset is encodable for instructions that scale the // immediate. - if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0) - return false; + assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 && + "Can't encode this offset!"); OffVal += Fixup / Scale; unsigned Mask = (1 << NumBits) - 1; diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp index df870314fffe..354980e4bf3c 100644 --- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp +++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp @@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) { if (MI.getOpcode() == BPF::SRL_ri && MI.getOperand(2).getImm() == 32) { SrcReg = MI.getOperand(1).getReg(); + if (!MRI->hasOneNonDBGUse(SrcReg)) + continue; + MI2 = MRI->getVRegDef(SrcReg); DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp index 18a4f60c171a..0348e2200acb 100644 --- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp +++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp @@ -85,8 +85,17 @@ static bool BPFPreserveDITypeImpl(Function &F) { } else { Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE; DIType *Ty = cast(MD); + while (auto *DTy = dyn_cast(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type) + break; + Ty = DTy->getBaseType(); + } + if (Ty->getName().empty()) report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc"); + MD = Ty; } BasicBlock *BB = Call->getParent(); diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index c0244b9f2c74..a8fef2517b03 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -12,6 +12,7 @@ #include "BPFTargetMachine.h" #include "BPF.h" +#include "BPFTargetTransformInfo.h" #include "MCTargetDesc/BPFMCAsmInfo.h" #include "TargetInfo/BPFTargetInfo.h" #include "llvm/CodeGen/Passes.h" @@ -145,6 +146,11 @@ void BPFPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); } +TargetTransformInfo +BPFTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(BPFTTIImpl(this, F)); +} + // Install an instruction selector pass using // the ISelDag to gen BPF code. bool BPFPassConfig::addInstSelector() { diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h index 5243a15eb7b0..61c8a44cc402 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.h +++ b/llvm/lib/Target/BPF/BPFTargetMachine.h @@ -34,6 +34,8 @@ class BPFTargetMachine : public LLVMTargetMachine { TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h new file mode 100644 index 000000000000..62055497e685 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -0,0 +1,61 @@ +//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file uses the target's specific information to +// provide more precise answers to certain TTI queries, while letting the +// target independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H + +#include "BPFTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +namespace llvm { +class BPFTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const BPFSubtarget *ST; + const BPFTargetLowering *TLI; + + const BPFSubtarget *getST() const { return ST; } + const BPFTargetLowering *getTLI() const { return TLI; } + +public: + explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { + if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) + return TTI::TCC_Free; + + return TTI::TCC_Basic; + } + + int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, + TTI::TargetCostKind CostKind, + const llvm::Instruction *I = nullptr) { + if (Opcode == Instruction::Select) + return SCEVCheapExpansionBudget; + + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def index 2d2e9a04aa6d..66cf2c90ead4 100644 --- a/llvm/lib/Target/BPF/BTF.def +++ b/llvm/lib/Target/BPF/BTF.def @@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC) HANDLE_BTF_KIND(13, FUNC_PROTO) HANDLE_BTF_KIND(14, VAR) HANDLE_BTF_KIND(15, DATASEC) +HANDLE_BTF_KIND(16, FLOAT) #undef HANDLE_BTF_KIND diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index f9bdffe7cbae..9249d679c7bd 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -371,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) { } } +BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName) + : Name(TypeName) { + Kind = BTF::BTF_KIND_FLOAT; + BTFType.Info = Kind << 24; + BTFType.Size = roundupToBytes(SizeInBits); +} + +void BTFTypeFloat::completeType(BTFDebug &BDebug) { + if (IsCompleted) + return; + IsCompleted = true; + + BTFType.NameOff = BDebug.addString(Name); +} + uint32_t BTFStringTable::addString(StringRef S) { // Check whether the string already exists. for (auto &OffsetM : OffsetToIdMap) { @@ -409,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr TypeEntry) { } void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) { - // Only int types are supported in BTF. + // Only int and binary floating point types are supported in BTF. uint32_t Encoding = BTy->getEncoding(); - if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed && - Encoding != dwarf::DW_ATE_signed_char && - Encoding != dwarf::DW_ATE_unsigned && - Encoding != dwarf::DW_ATE_unsigned_char) + std::unique_ptr TypeEntry; + switch (Encoding) { + case dwarf::DW_ATE_boolean: + case dwarf::DW_ATE_signed: + case dwarf::DW_ATE_signed_char: + case dwarf::DW_ATE_unsigned: + case dwarf::DW_ATE_unsigned_char: + // Create a BTF type instance for this DIBasicType and put it into + // DIToIdMap for cross-type reference check. + TypeEntry = std::make_unique( + Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); + break; + case dwarf::DW_ATE_float: + TypeEntry = + std::make_unique(BTy->getSizeInBits(), BTy->getName()); + break; + default: return; + } - // Create a BTF type instance for this DIBasicType and put it into - // DIToIdMap for cross-type reference check. - auto TypeEntry = std::make_unique( - Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName()); TypeId = addType(std::move(TypeEntry), BTy); } @@ -1171,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { if (Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::ExternalLinkage && Linkage != GlobalValue::WeakAnyLinkage && + Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::ExternalWeakLinkage) continue; @@ -1199,8 +1225,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) { const DataLayout &DL = Global.getParent()->getDataLayout(); uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType()); - DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global), - Size); + DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId, + Asm->getSymbol(&Global), Size); } } @@ -1278,7 +1304,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) { uint8_t Scope = BTF::FUNC_EXTERN; auto FuncTypeEntry = std::make_unique(SP->getName(), ProtoTypeId, Scope); - addType(std::move(FuncTypeEntry)); + uint32_t FuncId = addType(std::move(FuncTypeEntry)); + if (F->hasSection()) { + StringRef SecName = F->getSection(); + + if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) { + DataSecEntries[std::string(SecName)] = + std::make_unique(Asm, std::string(SecName)); + } + + // We really don't know func size, set it to 0. + DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId, + Asm->getSymbol(F), 0); + } } void BTFDebug::endModule() { diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 1bad0d11fee4..76f1901779bb 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -187,7 +187,7 @@ class BTFKindDataSec : public BTFTypeBase { uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size(); } - void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { + void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) { Vars.push_back(std::make_tuple(Id, Sym, Size)); } std::string getName() { return Name; } @@ -195,6 +195,15 @@ class BTFKindDataSec : public BTFTypeBase { void emitType(MCStreamer &OS) override; }; +/// Handle binary floating point type. +class BTFTypeFloat : public BTFTypeBase { + StringRef Name; + +public: + BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName); + void completeType(BTFDebug &BDebug) override; +}; + /// String table. class BTFStringTable { /// String table size in bytes. diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index cce21f32414a..6257709731b9 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -321,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, O << "0, "; printOperand(MI, OpNo, O); return false; + case 'I': + // Write 'i' if an integer constant, otherwise nothing. Used to print + // addi vs add, etc. + if (MI->getOperand(OpNo).isImm()) + O << "i"; + return false; case 'U': // Print 'u' for update form. case 'X': // Print 'x' for indexed form. // FIXME: Currently for PowerPC memory operands are always loaded diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp index 50ce11b8374f..16536bf23deb 100644 --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(ScratchReg) - .addDef(TempReg) // TempReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(TempReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, bool HasBP = RegInfo->hasBasePointer(MF); Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, @@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) - // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 - // available and r1 is already copied to r30 which is BPReg. So BPReg stores - // the value of stackptr. - // First we have to probe tail interval whose size is less than probesize, - // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, - // ScratchReg stores the value of ((stackptr % align) % probesize). Then we - // probe each block sized probesize until stackptr meets - // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized - // as negprobesize. At both stages, TempReg stores the value of - // (stackptr - (stackptr % align)). - auto dynamicProbe = [&](MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register ScratchReg, - Register TempReg) { - assert(HasBP && isPPC64 && "Probe alignment part not available"); + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - // ScratchReg = stackptr % align - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(BPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) - .addReg(ScratchReg) - .addReg(BPReg); - // ScratchReg = (stackptr % align) % probesize - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(ScratchReg) - .addImm(0) - .addImm(64 - Log2(ProbeSize)); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval - // neg $scratchreg, $scratchreg - // stdux $bpreg, r1, $scratchreg + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // bge bb.2 + // bb.1: + // stdu , (r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, + // blt bb.1 // bb.2: - // - // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block - // stdux $bpreg, r1, $scratchreg - // cmpd r1, $tempreg - // bne bb.3 - // bb.4: - // ... + // stdux , r1, $scratchreg MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeResidualMBB); - MachineBasicBlock *ProbeLoopPreHeaderMBB = - MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 - ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); - ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); - // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); - MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); - // bb.1 - BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) - .addReg(ScratchReg); - allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, - false, BPReg); - ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); // bb.2 - MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), - NegProbeSize, ScratchReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeExitMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, - false, BPReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_NE) - .addReg(CRReg) - .addMBB(ProbeLoopBodyMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } + // bb.0 + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } + // bb.1 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } // Update liveins. - recomputeLiveIns(*ProbeResidualMBB); - recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); recomputeLiveIns(*ProbeExitMBB); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign. + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in - // 64-bit mode. - if (isPPC64) { - // Use BPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. - Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); - // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) - .addReg(BPReg) - .addReg(BPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) - .addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) - .addReg(ScratchReg) - .addReg(SPReg); - } + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { // Initialize current frame pointer. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Use FPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 929a72ac687e..7833bfc1d1b6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // Custom lower inline assembly to check for special registers. + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -3461,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, return Op.getOperand(0); } +SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo &MFI = *MF.getInfo(); + + assert((Op.getOpcode() == ISD::INLINEASM || + Op.getOpcode() == ISD::INLINEASM_BR) && + "Expecting Inline ASM node."); + + // If an LR store is already known to be required then there is not point in + // checking this ASM as well. + if (MFI.isLRStoreRequired()) + return Op; + + // Inline ASM nodes have an optional last operand that is an incoming Flag of + // type MVT::Glue. We want to ignore this last operand if that is the case. + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; + + // Check all operands that may contain the LR. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + Register Reg = cast(Op.getOperand(i))->getReg(); + if (Reg != PPC::LR && Reg != PPC::LR8) + continue; + MFI.setLRStoreRequired(); + return Op; + } + break; + } + } + } + + return Op; +} + SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) @@ -10316,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INLINEASM: + case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); @@ -15090,6 +15147,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); + } else if (Constraint == "lr") { + if (VT == MVT::i64) + return std::make_pair(0U, &PPC::LR8RCRegClass); + else + return std::make_pair(0U, &PPC::LRRCRegClass); } // If we name a VSX register, we can't defer to the base class because it diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0dda2c181572..836c52bdff95 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1128,6 +1128,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index e03617aa75ff..551735c85b51 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -173,7 +173,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in { foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { def VSRp#!srl(Index, 1) : VSRPair("VSL"#Index), !cast("VSL"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } // VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63). @@ -181,7 +181,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in { def VSRp#!add(!srl(Index, 1), 16) : VSRPair("V"#Index), !cast("V"#!add(Index, 1))]>, - DwarfRegNum<[0, 0]>; + DwarfRegNum<[-1, -1]>; } } @@ -409,20 +409,27 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> { let isAllocatable = 0; } +def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { + let isAllocatable = 0; +} +def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { + let isAllocatable = 0; +} + def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, ACC4, ACC5, ACC6, ACC7)> { @@ -430,14 +437,14 @@ def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, } let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>; + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; } def UACCRC : RegisterClass<"PPC", [v512i1], 128, (add UACC0, UACC1, UACC2, UACC3, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b3d8100fe016..c90ff8b7d59d 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1212,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +bool PPCTTIImpl::areFunctionArgsABICompatible( + const Function *Caller, const Function *Callee, + SmallPtrSetImpl &Args) const { + + // We need to ensure that argument promotion does not + // attempt to promote pointers to MMA types (__vector_pair + // and __vector_quad) since these types explicitly cannot be + // passed as arguments. Both of these types are larger than + // the 128-bit Altivec vectors and have a scalar size of 1 bit. + if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + return false; + + return llvm::none_of(Args, [](Argument *A) { + auto *EltTy = cast(A->getType())->getElementType(); + if (EltTy->isSized()) + return (EltTy->isIntOrIntVectorTy(1) && + EltTy->getPrimitiveSizeInBits() > 128); + return false; + }); +} + bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) { diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index bc946715156f..c38ae90bc7dc 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -129,6 +129,9 @@ class PPCTTIImpl : public BasicTTIImplBase { unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); + bool areFunctionArgsABICompatible(const Function *Caller, + const Function *Callee, + SmallPtrSetImpl &Args) const; /// @} }; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 60bd1b24cab8..5c228820f0cc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3909,10 +3909,10 @@ foreach vti = AllIntegerVectors in { (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3922,17 +3922,17 @@ foreach vti = AllIntegerVectors in { GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), + def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1, (DecImm simm5_plus1:$rs2), GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask VR:$merge), + (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK") VR:$merge, @@ -3950,11 +3950,11 @@ foreach vti = AllIntegerVectors in { vti.RegClass:$rs1, GPR:$vl, vti.SEW)>; - def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0), - (vti.Vector vti.RegClass:$rs1), - (vti.Scalar 0), - (vti.Mask VR:$merge), - (XLenVT (VLOp GPR:$vl)))), + def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge), + (vti.Vector vti.RegClass:$rs1), + (vti.Scalar 0), + (vti.Mask V0), + (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 603446755aaf..9ace36f344a5 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -285,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); - // FIXME: Can we support these natively? + // Expand 128 bit shifts without using a libcall. setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp index d3bbadf27478..ff6404c30971 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp @@ -885,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { SmallVector FMCArgs; for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { Constant *Clause = LPI->getClause(I); - // As a temporary workaround for the lack of aggregate varargs support - // in the interface between JS and wasm, break out filter operands into - // their component elements. - if (LPI->isFilter(I)) { - auto *ATy = cast(Clause->getType()); - for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) { - Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter"); - FMCArgs.push_back(EV); - } - } else + // TODO Handle filters (= exception specifications). + // https://bugs.llvm.org/show_bug.cgi?id=50396 + if (LPI->isCatch(I)) FMCArgs.push_back(Clause); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6b816c710f98..1e2407c7e7f6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37889,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) { // replicating low and high halves (and without changing the type/length of // the vector), we don't need the shuffle. if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { + if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector()) + return SDValue(); if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) { // movddup (hadd X, X) --> hadd X, X // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index 7a2facf226d8..dc6361aecc60 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1344,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>; // Any instruction that defines a 32-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. Any other 32-bit operation will zero-extend -// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper -// 32 bits, they're probably just qualifying a CopyFromReg. +// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying +// anything about the upper 32 bits, they're probably just qualifying a +// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit +// operation will zero-extend up to 64 bits. def def32 : PatLeaf<(i32 GR32:$src), [{ return N->getOpcode() != ISD::TRUNCATE && N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && N->getOpcode() != ISD::CopyFromReg && N->getOpcode() != ISD::AssertSext && - N->getOpcode() != ISD::AssertZext; + N->getOpcode() != ISD::AssertZext && + N->getOpcode() != ISD::AssertAlign && + N->getOpcode() != ISD::FREEZE; }]>; // In the case of a 32-bit def that is known to implicitly zero-extend, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 68c4156af2c4..85a7abe211b3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3221,11 +3221,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { } } - // ~(X - Y) --> ~X + Y - if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) - if (isa(X) || NotVal->hasOneUse()) - return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); - // ~(~X >>s Y) --> (X >>s Y) if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y)))) return BinaryOperator::CreateAShr(X, Y); @@ -3256,9 +3251,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y); } - // ~(X + C) --> -(C + 1) - X - if (match(Op0, m_Add(m_Value(X), m_Constant(C)))) - return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X); + // ~(X + C) --> ~C - X + if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C)))) + return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X); + + // ~(X - Y) --> ~X + Y + // FIXME: is it really beneficial to sink the `not` here? + if (match(NotVal, m_Sub(m_Value(X), m_Value(Y)))) + if (isa(X) || NotVal->hasOneUse()) + return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y); // ~(~X + Y) --> X - Y if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y)))) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index f26c194d31b9..5f174aae09ec 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1095,7 +1095,10 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp, /// TODO: Wrapping flags could be preserved in some cases with better analysis. Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp) { - if (!Cmp.isEquality()) + // Value equivalence substitution requires an all-or-nothing replacement. + // It does not make sense for a vector compare where each lane is chosen + // independently. + if (!Cmp.isEquality() || Cmp.getType()->isVectorTy()) return nullptr; // Canonicalize the pattern to ICMP_EQ by swapping the select operands. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 7295369365c4..127bf8080959 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -21,6 +21,30 @@ using namespace PatternMatch; #define DEBUG_TYPE "instcombine" +bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1, + Value *ShAmt1) { + // We have two shift amounts from two different shifts. The types of those + // shift amounts may not match. If that's the case let's bailout now.. + if (ShAmt0->getType() != ShAmt1->getType()) + return false; + + // As input, we have the following pattern: + // Sh0 (Sh1 X, Q), K + // We want to rewrite that as: + // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) + // While we know that originally (Q+K) would not overflow + // (because 2 * (N-1) u<= iN -1), we have looked past extensions of + // shift amounts. so it may now overflow in smaller bitwidth. + // To ensure that does not happen, we need to ensure that the total maximal + // shift amount is still representable in that smaller bit width. + unsigned MaximalPossibleTotalShiftAmount = + (Sh0->getType()->getScalarSizeInBits() - 1) + + (Sh1->getType()->getScalarSizeInBits() - 1); + APInt MaximalRepresentableShiftAmount = + APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); + return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount); +} + // Given pattern: // (x shiftopcode Q) shiftopcode K // we should rewrite it as @@ -57,26 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts( if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) return nullptr; - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now.. - if (ShAmt0->getType() != ShAmt1->getType()) - return nullptr; - - // As input, we have the following pattern: - // Sh0 (Sh1 X, Q), K - // We want to rewrite that as: - // Sh x, (Q+K) iff (Q+K) u< bitwidth(x) - // While we know that originally (Q+K) would not overflow - // (because 2 * (N-1) u<= iN -1), we have looked past extensions of - // shift amounts. so it may now overflow in smaller bitwidth. - // To ensure that does not happen, we need to ensure that the total maximal - // shift amount is still representable in that smaller bit width. - unsigned MaximalPossibleTotalShiftAmount = - (Sh0->getType()->getScalarSizeInBits() - 1) + - (Sh1->getType()->getScalarSizeInBits() - 1); - APInt MaximalRepresentableShiftAmount = - APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits()); - if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount)) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1)) return nullptr; // We are only looking for signbit extraction if we have two right shifts. @@ -220,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (MaskShAmt+ShiftShAmt) ? @@ -252,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift, // Peek through an optional zext of the shift amount. match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt))); - // We have two shift amounts from two different shifts. The types of those - // shift amounts may not match. If that's the case let's bailout now. - if (MaskShAmt->getType() != ShiftShAmt->getType()) + // Verify that it would be safe to try to add those two shift amounts. + if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked, + MaskShAmt)) return nullptr; // Can we simplify (ShiftShAmt-MaskShAmt) ? diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index b3bae47e96de..65a6205f0302 100644 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po DenseSet V; collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); for (auto *I : V) { + if (I->mayHaveSideEffects()) { + LLVM_DEBUG(dbgs() << "LRR: Aborting - " + << "An instruction which does not belong to any root " + << "sets must not have side effects: " << *I); + return false; + } Uses[I].set(IL_All); } diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp index bfe8db83b027..bb30c48127a0 100644 --- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp @@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled, "Number of 'objectsize' intrinsic calls handled"); static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) { - Value *Op = II->getOperand(0); - - return isa(Op) ? ConstantInt::getTrue(II->getType()) - : ConstantInt::getFalse(II->getType()); + if (auto *C = dyn_cast(II->getOperand(0))) + if (C->isManifestConstant()) + return ConstantInt::getTrue(II->getType()); + return ConstantInt::getFalse(II->getType()); } static bool replaceConditionalBranchesOnConstant(Instruction *II, diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index de6be52adf21..8feed9e9ebfe 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -542,9 +542,14 @@ class SCCPSolver : public InstVisitor { auto Iter = AdditionalUsers.find(I); if (Iter != AdditionalUsers.end()) { + // Copy additional users before notifying them of changes, because new + // users may be added, potentially invalidating the iterator. + SmallVector ToNotify; for (User *U : Iter->second) if (auto *UI = dyn_cast(U)) - OperandChangedState(UI); + ToNotify.push_back(UI); + for (Instruction *UI : ToNotify) + OperandChangedState(UI); } } void handleCallOverdefined(CallBase &CB); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 3026342cc4a6..fb271a2118ba 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -780,7 +780,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. -static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, + Function::iterator FEnd) { MDNode *MemParallelLoopAccess = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); @@ -789,41 +790,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) { if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!VMI->second || !isa(VMI->first)) - continue; + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // This metadata is only relevant for instructions that access memory. + if (!I.mayReadOrWriteMemory()) + continue; - Instruction *NI = dyn_cast(VMI->second); - if (!NI) - continue; - - // This metadata is only relevant for instructions that access memory. - if (!NI->mayReadOrWriteMemory()) - continue; - - if (MemParallelLoopAccess) { - // TODO: This probably should not overwrite MemParalleLoopAccess. - MemParallelLoopAccess = MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access), - MemParallelLoopAccess); - NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, + if (MemParallelLoopAccess) { + // TODO: This probably should not overwrite MemParalleLoopAccess. + MemParallelLoopAccess = MDNode::concatenate( + I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), + MemParallelLoopAccess); + I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, MemParallelLoopAccess); + } + + if (AccessGroup) + I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( + I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); + + if (AliasScope) + I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); + + if (NoAlias) + I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), NoAlias)); } - - if (AccessGroup) - NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( - NI->getMetadata(LLVMContext::MD_access_group), AccessGroup)); - - if (AliasScope) - NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope)); - - if (NoAlias) - NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( - NI->getMetadata(LLVMContext::MD_noalias), NoAlias)); } } @@ -844,9 +837,9 @@ class ScopedAliasMetadataDeepCloner { /// subsequent remap() calls. void clone(); - /// Remap instructions in the given VMap from the original to the cloned + /// Remap instructions in the given range from the original to the cloned /// metadata. - void remap(ValueToValueMapTy &VMap); + void remap(Function::iterator FStart, Function::iterator FEnd); }; ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( @@ -907,34 +900,27 @@ void ScopedAliasMetadataDeepCloner::clone() { } } -void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) { +void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, + Function::iterator FEnd) { if (MDMap.empty()) return; // Nothing to do. - for (auto Entry : VMap) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!Entry->second || !isa(Entry->first)) - continue; + for (BasicBlock &BB : make_range(FStart, FEnd)) { + for (Instruction &I : BB) { + // TODO: The null checks for the MDMap.lookup() results should no longer + // be necessary. + if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_alias_scope, MNew); - Instruction *I = dyn_cast(Entry->second); - if (!I) - continue; + if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + if (MDNode *MNew = MDMap.lookup(M)) + I.setMetadata(LLVMContext::MD_noalias, MNew); - // Only update scopes when we find them in the map. If they are not, it is - // because we already handled that instruction before. This is faster than - // tracking which instructions we already updated. - if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_alias_scope, MNew); - - if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) - if (MDNode *MNew = MDMap.lookup(M)) - I->setMetadata(LLVMContext::MD_noalias, MNew); - - if (auto *Decl = dyn_cast(I)) - if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) - Decl->setScopeList(MNew); + if (auto *Decl = dyn_cast(&I)) + if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) + Decl->setScopeList(MNew); + } } } @@ -1926,7 +1912,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Now clone the inlined noalias scope metadata. SAMetadataCloner.clone(); - SAMetadataCloner.remap(VMap); + SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1936,7 +1922,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AddReturnAttributes(CB, VMap); // Propagate metadata on the callsite if necessary. - PropagateCallSiteMetadata(CB, VMap); + PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache)