From 7ed7200811069c513465e0a7867ec0cb24bdb2dc Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 17 Mar 2022 16:34:17 +0100 Subject: [PATCH] Vendor import of llvm-project branch release/14.x llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a. --- clang/include/clang/Basic/LangOptions.h | 4 - clang/lib/AST/RecordLayoutBuilder.cpp | 7 +- clang/lib/Driver/ToolChains/PPCLinux.cpp | 6 +- clang/lib/Frontend/CompilerInvocation.cpp | 4 - .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 4 +- libcxx/include/span | 50 ++++++++++- lld/COFF/Writer.cpp | 8 +- llvm/include/llvm/Analysis/InlineCost.h | 5 ++ llvm/include/llvm/Transforms/Scalar.h | 3 +- llvm/include/llvm/Transforms/Scalar/LICM.h | 20 +++-- .../include/llvm/Transforms/Utils/LoopUtils.h | 9 +- .../Transforms/Utils/SimplifyCFGOptions.h | 5 ++ llvm/lib/Analysis/InlineCost.cpp | 13 ++- llvm/lib/CodeGen/MachineSink.cpp | 12 +++ llvm/lib/MC/WasmObjectWriter.cpp | 62 +++++++++----- llvm/lib/Passes/PassBuilder.cpp | 2 + llvm/lib/Passes/PassBuilderPipelines.cpp | 82 +++++++++++++------ llvm/lib/Passes/PassRegistry.def | 1 + .../Target/AArch64/AArch64TargetMachine.cpp | 1 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 8 +- .../Target/Hexagon/HexagonTargetMachine.cpp | 1 + llvm/lib/Target/Mips/MipsISelLowering.cpp | 13 +-- llvm/lib/Transforms/IPO/Inliner.cpp | 45 +++++++++- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 38 ++++----- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 55 +++++++++---- llvm/lib/Transforms/Scalar/LICM.cpp | 71 ++++++++-------- .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 9 ++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 +- 28 files changed, 382 insertions(+), 160 deletions(-) diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 50c7f038fc6b..09afa641acf9 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -181,10 +181,6 @@ class LangOptions : public LangOptionsBase { /// global-scope inline variables incorrectly. Ver12, - /// Attempt to be ABI-compatible with code generated by Clang 13.0.x. - /// This causes clang to not pack non-POD members of packed structs. - Ver13, - /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 709e05716a56..61a30ead165e 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1887,12 +1887,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; - llvm::Triple Target = Context.getTargetInfo().getTriple(); - bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() || - Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver13 || - Target.isPS4() || Target.isOSDarwin())) || - D->hasAttr(); + bool FieldPacked = Packed || D->hasAttr(); AlignRequirementKind AlignRequirement = AlignRequirementKind::None; CharUnits FieldSize; diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp index e480d8bd8703..2fea262fd109 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -76,9 +76,11 @@ bool PPCLinuxToolChain::SupportIEEEFloat128( if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx)) return true; + CXXStdlibType StdLib = ToolChain::GetCXXStdlibType(Args); bool HasUnsupportedCXXLib = - ToolChain::GetCXXStdlibType(Args) == CST_Libcxx && - GCCInstallation.getVersion().isOlderThan(12, 1, 0); + StdLib == CST_Libcxx || + (StdLib == CST_Libstdcxx && + GCCInstallation.getVersion().isOlderThan(12, 1, 0)); return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) && !(D.CCCIsCXX() && HasUnsupportedCXXLib); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 553a0b31c0ab..7f1ce3da7e7e 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3560,8 +3560,6 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA); - else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13) - GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA); if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::All) @@ -4064,8 +4062,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.setClangABICompat(LangOptions::ClangABI::Ver11); else if (Major <= 12) Opts.setClangABICompat(LangOptions::ClangABI::Ver12); - else if (Major <= 13) - Opts.setClangABICompat(LangOptions::ClangABI::Ver13); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1da0dfec3f23..467372c71496 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -6012,7 +6012,9 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, (ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() || isa(ParentDC) || isa(ParentDC))) || - (isa(D) && cast(D)->isLambda())) { + (isa(D) && cast(D)->isLambda() && + cast(D)->getTemplateDepth() > + TemplateArgs.getNumRetainedOuterLevels())) { // D is a local of some kind. Look into the map of local // declarations to their instantiations. if (CurrentInstantiationScope) { diff --git a/libcxx/include/span b/libcxx/include/span index fd95ecca17f7..b8dbc7e01fd6 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -170,7 +170,25 @@ struct __is_std_span : false_type {}; template struct __is_std_span> : true_type {}; -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +// This is a temporary workaround until we ship -- we've unfortunately been +// shipping before its API was finalized, and we used to provide a constructor +// from container types that had the requirements below. To avoid breaking code that +// has started relying on the range-based constructor until we ship all of , +// we emulate the constructor requirements like this. +template +struct __span_compatible_range : false_type { }; + +template +struct __span_compatible_range<_Range, _ElementType, void_t< + enable_if_t>::value>, + enable_if_t>::value>, + enable_if_t>>, + decltype(data(declval<_Range>())), + decltype(size(declval<_Range>())), + enable_if_t()))>(*)[], _ElementType(*)[]>> +>> : true_type { }; +#else template concept __span_compatible_range = ranges::contiguous_range<_Range> && @@ -248,7 +266,22 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(_Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(const _Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} { @@ -434,7 +467,18 @@ public: _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(_Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(const _Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {} diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 12db942f1db5..1ed2327ea630 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -926,8 +926,14 @@ void Writer::createSections() { // Move DISCARDABLE (or non-memory-mapped) sections to the end of file // because the loader cannot handle holes. Stripping can remove other // discardable ones than .reloc, which is first of them (created early). - if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) + if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) { + // Move discardable sections named .debug_ to the end, after other + // discardable sections. Stripping only removes the sections named + // .debug_* - thus try to avoid leaving holes after stripping. + if (s->name.startswith(".debug_")) + return 3; return 2; + } // .rsrc should come at the end of the non-discardable sections because its // size may change by the Win32 UpdateResources() function, causing // subsequent sections to move (see https://crbug.com/827082). diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index f86ee5a14874..d3fa3b879125 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -52,6 +52,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024; /// Do not inline dynamic allocas that have been constant propagated to be /// static allocas above this amount in bytes. const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; + +const char FunctionInlineCostMultiplierAttributeName[] = + "function-inline-cost-multiplier"; } // namespace InlineConstants // The cost-benefit pair computed by cost-benefit analysis. @@ -217,6 +220,8 @@ struct InlineParams { Optional AllowRecursiveCall = false; }; +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind); + /// Generate the parameters to tune the inline cost analysis based only on the /// commandline options. InlineParams getInlineParams(); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index d6228700aa9a..4d6874f784ef 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass(); // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool AllowSpeculation); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h index 751f75c0ccb2..503c8792d309 100644 --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -46,14 +46,18 @@ extern cl::opt SetLicmMssaNoAccForPromotionCap; class LICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; @@ -62,14 +66,18 @@ class LICMPass : public PassInfoMixin { class LNICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LNICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 3a712d78df67..134f8bcfd888 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -171,10 +171,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool AllowSpeculation); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -204,12 +207,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + OptimizationRemarkEmitter *, bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index fb3a7490346f..7af879638a4d 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -23,6 +23,7 @@ class AssumptionCache; struct SimplifyCFGOptions { int BonusInstThreshold = 1; bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchRangeToICmp = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; @@ -41,6 +42,10 @@ struct SimplifyCFGOptions { ForwardSwitchCondToPhi = B; return *this; } + SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) { + ConvertSwitchRangeToICmp = B; + return *this; + } SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { ConvertSwitchToLookupTable = B; return *this; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index d5411d916c77..cd5314e7a17a 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -133,8 +133,6 @@ static cl::opt DisableGEPConstOperand( cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { -class InlineCostCallAnalyzer; - /// This function behaves more like CallBase::hasFnAttr: when it looks for the /// requested attribute, it check both the call instruction and the called /// function (if it's available and operand bundles don't prohibit that). @@ -151,7 +149,9 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) { return {}; } +} // namespace +namespace llvm { Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { Attribute Attr = getFnAttr(CB, AttrKind); int AttrValue; @@ -159,6 +159,10 @@ Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { return None; return AttrValue; } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction @@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) Cost = *AttrCost; + if (Optional AttrCostMult = getStringFnAttrAsInt( + CandidateCall, + InlineConstants::FunctionInlineCostMultiplierAttributeName)) + Cost *= *AttrCostMult; + if (Optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..bc03776bde19 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -18,12 +18,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 636c1d238932..a016b7085a00 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -140,36 +140,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { } #endif -// Write X as an (unsigned) LEB value at offset Offset in Stream, padded +// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +template +void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeULEB128(X, Buffer, W); + unsigned SizeLen = encodeULEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as an signed LEB value at offset Offset in Stream, padded +// Write Value as an signed LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) { +template +void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeSLEB128(X, Buffer, W); + unsigned SizeLen = encodeSLEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as a plain integer value at offset Offset in Stream. -static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { + writePatchableULEB(Stream, Value, Offset); +} + +static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +// Write Value as a plain integer value at offset Offset in Stream. +static void patchI32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { uint8_t Buffer[4]; - support::endian::write32le(Buffer, X); + support::endian::write32le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } -static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +static void patchI64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { uint8_t Buffer[8]; - support::endian::write64le(Buffer, X); + support::endian::write64le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } @@ -423,8 +445,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) { // Write the final section size to the payload_len field, which follows // the section id byte. - writePatchableLEB<5>(static_cast(W->OS), Size, - Section.SizeOffset); + writePatchableU32(static_cast(W->OS), Size, + Section.SizeOffset); } // Emit the Wasm header. @@ -755,7 +777,7 @@ void WasmObjectWriter::applyRelocations( RelEntry.Offset; LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); - auto Value = getProvisionalValue(RelEntry, Layout); + uint64_t Value = getProvisionalValue(RelEntry, Layout); switch (RelEntry.Type) { case wasm::R_WASM_FUNCTION_INDEX_LEB: @@ -764,10 +786,10 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: - writePatchableLEB<5>(Stream, Value, Offset); + writePatchableU32(Stream, Value, Offset); break; case wasm::R_WASM_MEMORY_ADDR_LEB64: - writePatchableLEB<10>(Stream, Value, Offset); + writePatchableU64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_I32: @@ -787,14 +809,14 @@ void WasmObjectWriter::applyRelocations( case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: - writePatchableSLEB<5>(Stream, Value, Offset); + writePatchableS32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: - writePatchableSLEB<10>(Stream, Value, Offset); + writePatchableS64(Stream, Value, Offset); break; default: llvm_unreachable("invalid relocation type"); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 015ca1eec4df..dedfc81f11bb 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -679,6 +679,8 @@ Expected parseSimplifyCFGOptions(StringRef Params) { bool Enable = !ParamName.consume_front("no-"); if (ParamName == "forward-switch-cond") { Result.forwardSwitchCondToPhi(Enable); + } else if (ParamName == "switch-range-to-icmp") { + Result.convertSwitchRangeToICmp(Enable); } else if (ParamName == "switch-to-lookup") { Result.convertSwitchToLookupTable(Enable); } else if (ParamName == "keep-loops") { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 93637c890c4f..e838665eb9ce 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -259,14 +259,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -291,14 +293,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); @@ -335,7 +342,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. @@ -373,7 +381,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // the simplifications and basic cleanup after all the simplifications. // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -408,7 +417,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Global value numbering based sinking. if (EnableGVNSink) { FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } if (EnableConstraintElimination) @@ -421,7 +431,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); @@ -438,7 +449,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(PGOMemOPSizeOpt()); FPM.addPass(TailCallElimPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -463,15 +475,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); @@ -510,7 +527,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. @@ -567,7 +585,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); @@ -575,8 +594,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); - FPM.addPass(SimplifyCFGPass( - SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -614,7 +635,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, FunctionPassManager FPM; FPM.addPass(SROAPass()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. - FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -928,7 +950,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); - GlobalCleanupPM.addPass(SimplifyCFGPass()); + GlobalCleanupPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), PTO.EagerlyInvalidateAnalyses)); @@ -1007,7 +1030,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, ExtraPasses.addPass(CorrelatedValuePropagationPass()); ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( @@ -1015,7 +1039,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, ExtraPasses.addPass( createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - ExtraPasses.addPass(SimplifyCFGPass()); + ExtraPasses.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); ExtraPasses.addPass(InstCombinePass()); FPM.addPass(std::move(ExtraPasses)); } @@ -1031,6 +1056,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // before SLP vectorization. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -1073,7 +1099,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } @@ -1202,7 +1229,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); OptimizePM.addPass(CoroCleanupPass()); @@ -1612,7 +1640,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) @@ -1676,8 +1705,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. - MPM.addPass(createModuleToFunctionPassAdaptor( - SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)))); + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( + true)))); // Drop bodies of available eternally objects to improve GlobalDCE. MPM.addPass(EliminateAvailableExternallyPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 8e0af11b854d..69d8d8c43267 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -423,6 +423,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg", }, parseSimplifyCFGOptions, "no-forward-switch-cond;forward-switch-cond;" + "no-switch-range-to-icmp;switch-range-to-icmp;" "no-switch-to-lookup;switch-to-lookup;" "no-keep-loops;keep-loops;" "no-hoist-common-insts;hoist-common-insts;" diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4af28fc070dd..6a751da7ad55 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -531,6 +531,7 @@ void AArch64PassConfig::addIRPasses() { if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0ba75a544c04..14b4f7c56c57 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -118,9 +118,10 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); // Vector shuffle is always promoted to ByteV and a bitcast to f16 is // generated. - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is @@ -780,7 +781,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, SDValue N = HalfV0; SDValue M = HalfV1; for (unsigned i = 0; i != NumWords/2; ++i) { - // Rotate by element count since last insertion. if (Words[i] != Words[n] || VecHist[n] <= 1) { Sn = DAG.getConstant(Rn, dl, MVT::i32); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index c6703bb8a62a..08acf81961a3 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -345,6 +345,7 @@ void HexagonPassConfig::addIRPasses() { if (EnableInitialCFGCleanup) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0c2e129b8f1f..8534a0ad886e 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4732,18 +4732,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI, Register MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { - // Named registers is expected to be fairly rare. For now, just support $28 - // since the linux kernel uses it. + // The Linux kernel uses $28 and sp. if (Subtarget.isGP64bit()) { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP_64) - .Default(Register()); + .Case("$28", Mips::GP_64) + .Case("sp", Mips::SP_64) + .Default(Register()); if (Reg) return Reg; } else { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP) - .Default(Register()); + .Case("$28", Mips::GP) + .Case("sp", Mips::SP) + .Default(Register()); if (Reg) return Reg; } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 49babc24cb82..10abea7ebd32 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -92,6 +93,18 @@ static cl::opt DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +static cl::opt IntraSCCCostMultiplier( + "intra-scc-cost-multiplier", cl::init(2), cl::Hidden, + cl::desc( + "Cost multiplier to multiply onto inlined call sites where the " + "new call was previously an intra-SCC call (not relevant when the " + "original call was already intra-SCC). This can accumulate over " + "multiple inlinings (e.g. if a call site already had a cost " + "multiplier and one of its inlined calls was also subject to " + "this, the inlined call would have the original multiplier " + "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " + "inlining through a child SCC which can cause terrible compile times")); + /// A flag for test, so we can print the content of the advisor when running it /// as part of the default (e.g. -O3) pipeline. static cl::opt KeepAdvisorForPrinting("keep-inline-advisor-for-printing", @@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - if (CG.lookupSCC(*CG.lookup(Callee)) == C && - UR.InlinedInternalEdges.count({&N, C})) { + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); @@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, continue; } + int CBCostMult = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) + .getValueOr(1); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( @@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } - if (NewCallee) - if (!NewCallee->isDeclaration()) + if (NewCallee) { + if (!NewCallee->isDeclaration()) { Calls->push({ICB, NewHistoryID}); + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. + // We attempt to mitigate this by making these calls exponentially + // more expensive. + // This doesn't apply to calls in the same SCC since if we do + // inline through the SCC the function will end up being + // self-recursive which the inliner bails out on, and inlining + // within an SCC is necessary for performance. + if (CalleeSCC != C && + CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { + Attribute NewCBCostMult = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineCostMultiplierAttributeName, + itostr(CBCostMult * IntraSCCCostMultiplier)); + ICB->addFnAttr(NewCBCostMult); + } + } + } } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 5113c0c67acc..7205ae178d21 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3712,9 +3712,9 @@ struct AAKernelInfoFunction : AAKernelInfo { // __kmpc_get_hardware_num_threads_in_block(); // WarpSize = __kmpc_get_warp_size(); // BlockSize = BlockHwSize - WarpSize; - // if (InitCB >= BlockSize) return; - // IsWorkerCheckBB: bool IsWorker = InitCB >= 0; + // IsWorkerCheckBB: bool IsWorker = InitCB != -1; // if (IsWorker) { + // if (InitCB >= BlockSize) return; // SMBeginBB: __kmpc_barrier_simple_generic(...); // void *WorkFn; // bool Active = __kmpc_kernel_parallel(&WorkFn); @@ -3771,6 +3771,13 @@ struct AAKernelInfoFunction : AAKernelInfo { ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); InitBB->getTerminator()->eraseFromParent(); + Instruction *IsWorker = + ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, + ConstantInt::get(KernelInitCB->getType(), -1), + "thread.is_worker", InitBB); + IsWorker->setDebugLoc(DLoc); + BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); auto &OMPInfoCache = static_cast(A.getInfoCache()); FunctionCallee BlockHwSizeFn = @@ -3780,29 +3787,22 @@ struct AAKernelInfoFunction : AAKernelInfo { OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_warp_size); CallInst *BlockHwSize = - CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); + CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); BlockHwSize->setDebugLoc(DLoc); - CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); + CallInst *WarpSize = + CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); WarpSize->setDebugLoc(DLoc); - Instruction *BlockSize = - BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); + Instruction *BlockSize = BinaryOperator::CreateSub( + BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); BlockSize->setDebugLoc(DLoc); - Instruction *IsMainOrWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, - BlockSize, "thread.is_main_or_worker", InitBB); + Instruction *IsMainOrWorker = ICmpInst::Create( + ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, + "thread.is_main_or_worker", IsWorkerCheckBB); IsMainOrWorker->setDebugLoc(DLoc); - BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, - InitBB); - - Instruction *IsWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, - ConstantInt::get(KernelInitCB->getType(), -1), - "thread.is_worker", IsWorkerCheckBB); - IsWorker->setDebugLoc(DLoc); - BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, - IsWorkerCheckBB); + BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, + IsMainOrWorker, IsWorkerCheckBB); // Create local storage for the work function pointer. const DataLayout &DL = M.getDataLayout(); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 74f68531b89a..6e5aeb9c41f6 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -365,7 +365,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, MPM.add(createFunctionInliningPass(IP)); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } @@ -404,7 +406,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } } @@ -418,7 +421,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals } - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); @@ -434,7 +439,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // TODO: Investigate the cost/benefit of tail call elimination on debugging. if (OptLevel > 1) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // The matrix extension can introduce large vector operations early, which can @@ -451,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLoopSimplifyCFGPass()); } // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else @@ -465,7 +477,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // FIXME: We break the loop pass pipeline here in order to do full // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. if (EnableLoopFlatten) { @@ -521,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses( // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -580,9 +594,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - PM.add(createCFGSimplificationPass()); + PM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); PM.add(createInstructionCombiningPass()); } @@ -597,6 +613,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // before SLP vectorization. PM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -641,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } PM.add(createWarnMissedTransformationsPass()); @@ -772,7 +790,9 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Clean up after IPCP & DAE // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd @@ -886,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager( // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -972,7 +993,8 @@ void PassManagerBuilder::populateModulePassManager( // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); addExtensionsToPM(EP_OptimizerLast, MPM); @@ -1120,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 7fb1a25bdf13..6372ce19f8ee 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -149,13 +149,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI = nullptr); +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation); static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); @@ -188,21 +186,26 @@ struct LoopInvariantCodeMotion { OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass( unsigned LicmMssaOptCap = SetLicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) - : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation = true) + : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } @@ -265,7 +268,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); @@ -290,7 +294,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(LN.getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, @@ -321,8 +326,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) { - return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); } llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, @@ -418,7 +425,8 @@ bool LoopInvariantCodeMotion::runOnLoop( Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); + &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. @@ -460,8 +468,8 @@ bool LoopInvariantCodeMotion::runOnLoop( for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -825,7 +833,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE, bool LoopNestMode) { + OptimizationRemarkEmitter *ORE, bool LoopNestMode, + bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && @@ -877,7 +886,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, true, &Flags, ORE) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, - CurLoop->getLoopPreheader()->getTerminator())) { + CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); @@ -1774,14 +1783,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation) { + if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = @@ -1949,7 +1956,7 @@ bool llvm::promoteLoopAccessesToScalars( SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2054,9 +2061,9 @@ bool llvm::promoteLoopAccessesToScalars( // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, ORE, - Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + Preheader->getTerminator(), AllowSpeculation)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ee17da1875e5..b8972751066d 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -59,6 +59,11 @@ static cl::opt UserKeepLoops( "keep-loops", cl::Hidden, cl::init(true), cl::desc("Preserve canonical loop structure (default = true)")); +static cl::opt UserSwitchRangeToICmp( + "switch-range-to-icmp", cl::Hidden, cl::init(false), + cl::desc( + "Convert switches into an integer range comparison (default = false)")); + static cl::opt UserSwitchToLookup( "switch-to-lookup", cl::Hidden, cl::init(false), cl::desc("Convert switches to lookup tables (default = false)")); @@ -311,6 +316,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { Options.BonusInstThreshold = UserBonusInstThreshold; if (UserForwardSwitchCond.getNumOccurrences()) Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; + if (UserSwitchRangeToICmp.getNumOccurrences()) + Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp; if (UserSwitchToLookup.getNumOccurrences()) Options.ConvertSwitchToLookupTable = UserSwitchToLookup; if (UserKeepLoops.getNumOccurrences()) @@ -337,6 +344,8 @@ void SimplifyCFGPass::printPipeline( OS << "<"; OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";"; OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; + OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") + << "switch-range-to-icmp;"; OS << (Options.ConvertSwitchToLookupTable ? "" : "no-") << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 335ac03ccb52..8c4e1b381b4d 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6211,7 +6211,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { } // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) + // The conversion from switch to comparison may lose information on + // impossible switch values, so disable it early in the pipeline. + if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder)) return requestResimplify(); // Remove unreachable cases.