Merge llvm-project release/14.x llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a

This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a. PR: 261742 MFC after: 2 weeks
2022-03-17 20:16:00 +01:00 · 2022-03-17 20:16:00 +01:00 · fb03ea46eb
commit fb03ea46eb
parent a1f13cbcbb 7ed7200811
31 changed files with 387 additions and 165 deletions
--- a/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h
+++ b/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h
@ -181,10 +181,6 @@ public:
    /// global-scope inline variables incorrectly.
    Ver12,

-    /// Attempt to be ABI-compatible with code generated by Clang 13.0.x.
-    /// This causes clang to not pack non-POD members of packed structs.
-    Ver13,
-
    /// Conform to the underlying platform's C and C++ ABIs as closely
    /// as we can.
    Latest
--- a/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp
@ -1887,12 +1887,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
  UnfilledBitsInLastUnit = 0;
  LastBitfieldStorageUnitSize = 0;

-  llvm::Triple Target = Context.getTargetInfo().getTriple();
-  bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() ||
-                                 Context.getLangOpts().getClangABICompat() <=
-                                     LangOptions::ClangABI::Ver13 ||
-                                 Target.isPS4() || Target.isOSDarwin())) ||
-                     D->hasAttr<PackedAttr>();
+  bool FieldPacked = Packed || D->hasAttr<PackedAttr>();

  AlignRequirementKind AlignRequirement = AlignRequirementKind::None;
  CharUnits FieldSize;
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp
@ -76,9 +76,11 @@ bool PPCLinuxToolChain::SupportIEEEFloat128(
  if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx))
    return true;

+  CXXStdlibType StdLib = ToolChain::GetCXXStdlibType(Args);
  bool HasUnsupportedCXXLib =
-      ToolChain::GetCXXStdlibType(Args) == CST_Libcxx &&
-      GCCInstallation.getVersion().isOlderThan(12, 1, 0);
+      StdLib == CST_Libcxx ||
+      (StdLib == CST_Libstdcxx &&
+       GCCInstallation.getVersion().isOlderThan(12, 1, 0));

  return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) &&
         !(D.CCCIsCXX() && HasUnsupportedCXXLib);
--- a/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp
@ -3560,8 +3560,6 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
    GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA);
  else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12)
    GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA);
-  else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13)
-    GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA);

  if (Opts.getSignReturnAddressScope() ==
      LangOptions::SignReturnAddressScopeKind::All)
@ -4064,8 +4062,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
        Opts.setClangABICompat(LangOptions::ClangABI::Ver11);
      else if (Major <= 12)
        Opts.setClangABICompat(LangOptions::ClangABI::Ver12);
-      else if (Major <= 13)
-        Opts.setClangABICompat(LangOptions::ClangABI::Ver13);
    } else if (Ver != "latest") {
      Diags.Report(diag::err_drv_invalid_value)
          << A->getAsString(Args) << A->getValue();
--- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@ -6012,7 +6012,9 @@ NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D,
      (ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() ||
                               isa<OMPDeclareReductionDecl>(ParentDC) ||
                               isa<OMPDeclareMapperDecl>(ParentDC))) ||
-      (isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda())) {
+      (isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda() &&
+       cast<CXXRecordDecl>(D)->getTemplateDepth() >
+           TemplateArgs.getNumRetainedOuterLevels())) {
    // D is a local of some kind. Look into the map of local
    // declarations to their instantiations.
    if (CurrentInstantiationScope) {
--- a/contrib/llvm-project/libcxx/include/span
+++ b/contrib/llvm-project/libcxx/include/span
@ -170,7 +170,25 @@ struct __is_std_span : false_type {};
 template <class _Tp, size_t _Sz>
 struct __is_std_span<span<_Tp, _Sz>> : true_type {};

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+// This is a temporary workaround until we ship <ranges> -- we've unfortunately been
+// shipping <span> before its API was finalized, and we used to provide a constructor
+// from container types that had the requirements below. To avoid breaking code that
+// has started relying on the range-based constructor until we ship all of <ranges>,
+// we emulate the constructor requirements like this.
+template <class _Range, class _ElementType, class = void>
+struct __span_compatible_range : false_type { };
+
+template <class _Range, class _ElementType>
+struct __span_compatible_range<_Range, _ElementType, void_t<
+    enable_if_t<!__is_std_span<remove_cvref_t<_Range>>::value>,
+    enable_if_t<!__is_std_array<remove_cvref_t<_Range>>::value>,
+    enable_if_t<!is_array_v<remove_cvref_t<_Range>>>,
+    decltype(data(declval<_Range>())),
+    decltype(size(declval<_Range>())),
+    enable_if_t<is_convertible_v<remove_pointer_t<decltype(data(declval<_Range&>()))>(*)[], _ElementType(*)[]>>
+>> : true_type { };
+#else
 template <class _Range, class _ElementType>
 concept __span_compatible_range =
  ranges::contiguous_range<_Range> &&
@ -248,7 +266,22 @@ public:
    _LIBCPP_INLINE_VISIBILITY
    constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {}

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+    template <class _Container, class = enable_if_t<
+        __span_compatible_range<_Container, element_type>::value
+    >>
+    _LIBCPP_INLINE_VISIBILITY
+    constexpr explicit span(_Container& __c) : __data{std::data(__c)} {
+      _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)");
+    }
+    template <class _Container, class = enable_if_t<
+        __span_compatible_range<const _Container, element_type>::value
+    >>
+    _LIBCPP_INLINE_VISIBILITY
+    constexpr explicit span(const _Container& __c) : __data{std::data(__c)} {
+      _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)");
+    }
+#else
    template <__span_compatible_range<element_type> _Range>
    _LIBCPP_INLINE_VISIBILITY
    constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} {
@ -434,7 +467,18 @@ public:
    _LIBCPP_INLINE_VISIBILITY
    constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {}

-#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES)
+    template <class _Container, class = enable_if_t<
+        __span_compatible_range<_Container, element_type>::value
+    >>
+    _LIBCPP_INLINE_VISIBILITY
+    constexpr span(_Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {}
+    template <class _Container, class = enable_if_t<
+        __span_compatible_range<const _Container, element_type>::value
+    >>
+    _LIBCPP_INLINE_VISIBILITY
+    constexpr span(const _Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {}
+#else
    template <__span_compatible_range<element_type> _Range>
    _LIBCPP_INLINE_VISIBILITY
    constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {}
--- a/contrib/llvm-project/lld/COFF/Writer.cpp
+++ b/contrib/llvm-project/lld/COFF/Writer.cpp
@ -926,8 +926,14 @@ void Writer::createSections() {
    // Move DISCARDABLE (or non-memory-mapped) sections to the end of file
    // because the loader cannot handle holes. Stripping can remove other
    // discardable ones than .reloc, which is first of them (created early).
-    if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE)
+    if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) {
+      // Move discardable sections named .debug_ to the end, after other
+      // discardable sections. Stripping only removes the sections named
+      // .debug_* - thus try to avoid leaving holes after stripping.
+      if (s->name.startswith(".debug_"))
+        return 3;
      return 2;
+    }
    // .rsrc should come at the end of the non-discardable sections because its
    // size may change by the Win32 UpdateResources() function, causing
    // subsequent sections to move (see https://crbug.com/827082).
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h
@ -52,6 +52,9 @@ const unsigned TotalAllocaSizeRecursiveCaller = 1024;
 /// Do not inline dynamic allocas that have been constant propagated to be
 /// static allocas above this amount in bytes.
 const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
+
+const char FunctionInlineCostMultiplierAttributeName[] =
+    "function-inline-cost-multiplier";
 } // namespace InlineConstants

 // The cost-benefit pair computed by cost-benefit analysis.
@ -217,6 +220,8 @@ struct InlineParams {
  Optional<bool> AllowRecursiveCall = false;
 };

+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind);
+
 /// Generate the parameters to tune the inline cost analysis based only on the
 /// commandline options.
 InlineParams getInlineParams();
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h
@ -133,7 +133,8 @@ Pass *createIndVarSimplifyPass();
 //
 Pass *createLICMPass();
 Pass *createLICMPass(unsigned LicmMssaOptCap,
-                     unsigned LicmMssaNoAccForPromotionCap);
+                     unsigned LicmMssaNoAccForPromotionCap,
+                     bool AllowSpeculation);

 //===----------------------------------------------------------------------===//
 //
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h
@ -46,14 +46,18 @@ extern cl::opt<unsigned> SetLicmMssaNoAccForPromotionCap;
 class LICMPass : public PassInfoMixin<LICMPass> {
  unsigned LicmMssaOptCap;
  unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;

 public:
  LICMPass()
      : LicmMssaOptCap(SetLicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
-  LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(true) {}
+  LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
+           bool LicmAllowSpeculation)
      : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}
  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
@ -62,14 +66,18 @@ public:
 class LNICMPass : public PassInfoMixin<LNICMPass> {
  unsigned LicmMssaOptCap;
  unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;

 public:
  LNICMPass()
      : LicmMssaOptCap(SetLicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
-  LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(true) {}
+  LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
+            bool LicmAllowSpeculation)
      : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}
  PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@ -171,10 +171,13 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
 bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
                 MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
-                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
+                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,
+                 bool AllowSpeculation);

 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@ -204,12 +207,14 @@ void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
 /// of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
 bool promoteLoopAccessesToScalars(
    const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
    SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
    PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
    Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-    OptimizationRemarkEmitter *);
+    OptimizationRemarkEmitter *, bool AllowSpeculation);

 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@ -23,6 +23,7 @@ class AssumptionCache;
 struct SimplifyCFGOptions {
  int BonusInstThreshold = 1;
  bool ForwardSwitchCondToPhi = false;
+  bool ConvertSwitchRangeToICmp = false;
  bool ConvertSwitchToLookupTable = false;
  bool NeedCanonicalLoop = true;
  bool HoistCommonInsts = false;
@ -41,6 +42,10 @@ struct SimplifyCFGOptions {
    ForwardSwitchCondToPhi = B;
    return *this;
  }
+  SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) {
+    ConvertSwitchRangeToICmp = B;
+    return *this;
+  }
  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
    ConvertSwitchToLookupTable = B;
    return *this;
--- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp
@ -133,8 +133,6 @@ static cl::opt<bool> DisableGEPConstOperand(
    cl::desc("Disables evaluation of GetElementPtr with constant operands"));

 namespace {
-class InlineCostCallAnalyzer;
-
 /// This function behaves more like CallBase::hasFnAttr: when it looks for the
 /// requested attribute, it check both the call instruction and the called
 /// function (if it's available and operand bundles don't prohibit that).
@ -151,7 +149,9 @@ Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {

  return {};
 }
+} // namespace

+namespace llvm {
 Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
  Attribute Attr = getFnAttr(CB, AttrKind);
  int AttrValue;
@ -159,6 +159,10 @@ Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
    return None;
  return AttrValue;
 }
+} // namespace llvm
+
+namespace {
+class InlineCostCallAnalyzer;

 // This struct is used to store information about inline cost of a
 // particular instruction
@ -904,6 +908,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
            getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
      Cost = *AttrCost;

+    if (Optional<int> AttrCostMult = getStringFnAttrAsInt(
+            CandidateCall,
+            InlineConstants::FunctionInlineCostMultiplierAttributeName))
+      Cost *= *AttrCostMult;
+
    if (Optional<int> AttrThreshold =
            getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
      Threshold = *AttrThreshold;
--- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp
@ -18,12 +18,14 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  RegClassInfo.runOnMachineFunction(MF);

+  // MachineSink currently uses MachineLoopInfo, which only recognizes natural
+  // loops. As such, we could sink instructions into irreducible cycles, which
+  // would be non-profitable.
+  // WARNING: The current implementation of hasStoreBetween() is incorrect for
+  // sinking into irreducible cycles (PR53990), this bailout is currently
+  // necessary for correctness, not just profitability.
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
+    return false;
+
  bool EverMadeChange = false;

  while (true) {
--- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp
@ -140,36 +140,58 @@ raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) {
 }
 #endif

-// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
+// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded
 // to allow patching.
-template <int W>
-void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {
+template <typename T, int W>
+void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {
  uint8_t Buffer[W];
-  unsigned SizeLen = encodeULEB128(X, Buffer, W);
+  unsigned SizeLen = encodeULEB128(Value, Buffer, W);
  assert(SizeLen == W);
  Stream.pwrite((char *)Buffer, SizeLen, Offset);
 }

-// Write X as an signed LEB value at offset Offset in Stream, padded
+// Write Value as an signed LEB value at offset Offset in Stream, padded
 // to allow patching.
-template <int W>
-void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) {
+template <typename T, int W>
+void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) {
  uint8_t Buffer[W];
-  unsigned SizeLen = encodeSLEB128(X, Buffer, W);
+  unsigned SizeLen = encodeSLEB128(Value, Buffer, W);
  assert(SizeLen == W);
  Stream.pwrite((char *)Buffer, SizeLen, Offset);
 }

-// Write X as a plain integer value at offset Offset in Stream.
-static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value,
+                              uint64_t Offset) {
+  writePatchableULEB<uint32_t, 5>(Stream, Value, Offset);
+}
+
+static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value,
+                              uint64_t Offset) {
+  writePatchableSLEB<int32_t, 5>(Stream, Value, Offset);
+}
+
+static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value,
+                              uint64_t Offset) {
+  writePatchableSLEB<uint64_t, 10>(Stream, Value, Offset);
+}
+
+static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value,
+                              uint64_t Offset) {
+  writePatchableSLEB<int64_t, 10>(Stream, Value, Offset);
+}
+
+// Write Value as a plain integer value at offset Offset in Stream.
+static void patchI32(raw_pwrite_stream &Stream, uint32_t Value,
+                     uint64_t Offset) {
  uint8_t Buffer[4];
-  support::endian::write32le(Buffer, X);
+  support::endian::write32le(Buffer, Value);
  Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
 }

-static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) {
+static void patchI64(raw_pwrite_stream &Stream, uint64_t Value,
+                     uint64_t Offset) {
  uint8_t Buffer[8];
-  support::endian::write64le(Buffer, X);
+  support::endian::write64le(Buffer, Value);
  Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
 }

@ -423,8 +445,8 @@ void WasmObjectWriter::endSection(SectionBookkeeping &Section) {

  // Write the final section size to the payload_len field, which follows
  // the section id byte.
-  writePatchableLEB<5>(static_cast<raw_pwrite_stream &>(W->OS), Size,
-                       Section.SizeOffset);
+  writePatchableU32(static_cast<raw_pwrite_stream &>(W->OS), Size,
+                    Section.SizeOffset);
 }

 // Emit the Wasm header.
@ -755,7 +777,7 @@ void WasmObjectWriter::applyRelocations(
                      RelEntry.Offset;

    LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n");
-    auto Value = getProvisionalValue(RelEntry, Layout);
+    uint64_t Value = getProvisionalValue(RelEntry, Layout);

    switch (RelEntry.Type) {
    case wasm::R_WASM_FUNCTION_INDEX_LEB:
@ -764,10 +786,10 @@ void WasmObjectWriter::applyRelocations(
    case wasm::R_WASM_MEMORY_ADDR_LEB:
    case wasm::R_WASM_TAG_INDEX_LEB:
    case wasm::R_WASM_TABLE_NUMBER_LEB:
-      writePatchableLEB<5>(Stream, Value, Offset);
+      writePatchableU32(Stream, Value, Offset);
      break;
    case wasm::R_WASM_MEMORY_ADDR_LEB64:
-      writePatchableLEB<10>(Stream, Value, Offset);
+      writePatchableU64(Stream, Value, Offset);
      break;
    case wasm::R_WASM_TABLE_INDEX_I32:
    case wasm::R_WASM_MEMORY_ADDR_I32:
@ -787,14 +809,14 @@ void WasmObjectWriter::applyRelocations(
    case wasm::R_WASM_MEMORY_ADDR_SLEB:
    case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
    case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB:
-      writePatchableSLEB<5>(Stream, Value, Offset);
+      writePatchableS32(Stream, Value, Offset);
      break;
    case wasm::R_WASM_TABLE_INDEX_SLEB64:
    case wasm::R_WASM_TABLE_INDEX_REL_SLEB64:
    case wasm::R_WASM_MEMORY_ADDR_SLEB64:
    case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
    case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64:
-      writePatchableSLEB<10>(Stream, Value, Offset);
+      writePatchableS64(Stream, Value, Offset);
      break;
    default:
      llvm_unreachable("invalid relocation type");
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp
@ -679,6 +679,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
    bool Enable = !ParamName.consume_front("no-");
    if (ParamName == "forward-switch-cond") {
      Result.forwardSwitchCondToPhi(Enable);
+    } else if (ParamName == "switch-range-to-icmp") {
+      Result.convertSwitchRangeToICmp(Enable);
    } else if (ParamName == "switch-to-lookup") {
      Result.convertSwitchToLookupTable(Enable);
    } else if (ParamName == "keep-loops") {
--- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp
@ -259,14 +259,16 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));

  // Hoisting of scalars and load expressions.
-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  FPM.addPass(InstCombinePass());

  FPM.addPass(LibCallsShrinkWrapPass());

  invokePeepholeEPCallbacks(FPM, Level);

-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

  // Form canonically associated expression trees, and simplify the trees using
  // basic mathematical properties. For example, this will form (nearly)
@ -291,14 +293,19 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
  LPM1.addPass(LoopSimplifyCFGPass());

  // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/false));

  LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
                              isLTOPreLink(Phase)));
  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
  LPM1.addPass(SimpleLoopUnswitchPass());
  if (EnableLoopFlatten)
    LPM1.addPass(LoopFlattenPass());
@ -335,7 +342,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
                                              /*UseMemorySSA=*/true,
                                              /*UseBlockFrequencyInfo=*/true));
-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  FPM.addPass(InstCombinePass());
  // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
  // *All* loop passes must preserve it, in order to be able to use it.
@ -373,7 +381,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
  // the simplifications and basic cleanup after all the simplifications.
  // TODO: Investigate if this is too expensive.
  FPM.addPass(ADCEPass());
-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  FPM.addPass(InstCombinePass());
  invokePeepholeEPCallbacks(FPM, Level);

@ -408,7 +417,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  // Global value numbering based sinking.
  if (EnableGVNSink) {
    FPM.addPass(GVNSinkPass());
-    FPM.addPass(SimplifyCFGPass());
+    FPM.addPass(
+        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  }

  if (EnableConstraintElimination)
@ -421,7 +431,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  FPM.addPass(JumpThreadingPass());
  FPM.addPass(CorrelatedValuePropagationPass());

-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  FPM.addPass(InstCombinePass());
  if (Level == OptimizationLevel::O3)
    FPM.addPass(AggressiveInstCombinePass());
@ -438,7 +449,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
    FPM.addPass(PGOMemOPSizeOpt());

  FPM.addPass(TailCallElimPass());
-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

  // Form canonically associated expression trees, and simplify the trees using
  // basic mathematical properties. For example, this will form (nearly)
@ -463,15 +475,20 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  LPM1.addPass(LoopSimplifyCFGPass());

  // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/false));

  // Disable header duplication in loop rotation at -Oz.
  LPM1.addPass(
      LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
  // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
  LPM1.addPass(
      SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
                             EnableO3NonTrivialUnswitching));
@ -510,7 +527,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
                                              /*UseMemorySSA=*/true,
                                              /*UseBlockFrequencyInfo=*/true));
-  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  FPM.addPass(InstCombinePass());
  // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
  // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
@ -567,7 +585,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,

  FPM.addPass(DSEPass());
  FPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+               /*AllowSpeculation=*/true),
      /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

  FPM.addPass(CoroElidePass());
@ -575,8 +594,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  for (auto &C : ScalarOptimizerLateEPCallbacks)
    C(FPM, Level);

-  FPM.addPass(SimplifyCFGPass(
-      SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
+  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+                                  .convertSwitchRangeToICmp(true)
+                                  .hoistCommonInsts(true)
+                                  .sinkCommonInsts(true)));
  FPM.addPass(InstCombinePass());
  invokePeepholeEPCallbacks(FPM, Level);

@ -614,7 +635,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
    FunctionPassManager FPM;
    FPM.addPass(SROAPass());
    FPM.addPass(EarlyCSEPass());    // Catch trivial redundancies.
-    FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+        true)));                    // Merge & remove basic blocks.
    FPM.addPass(InstCombinePass()); // Combine silly sequences.
    invokePeepholeEPCallbacks(FPM, Level);

@ -928,7 +950,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
  GlobalCleanupPM.addPass(InstCombinePass());
  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);

-  GlobalCleanupPM.addPass(SimplifyCFGPass());
+  GlobalCleanupPM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
                                                PTO.EagerlyInvalidateAnalyses));

@ -1007,7 +1030,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
    ExtraPasses.addPass(CorrelatedValuePropagationPass());
    ExtraPasses.addPass(InstCombinePass());
    LoopPassManager LPM;
-    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/true));
    LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
                                       OptimizationLevel::O3));
    ExtraPasses.addPass(
@ -1015,7 +1039,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
    ExtraPasses.addPass(
        createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
                                        /*UseBlockFrequencyInfo=*/true));
-    ExtraPasses.addPass(SimplifyCFGPass());
+    ExtraPasses.addPass(
+        SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
    ExtraPasses.addPass(InstCombinePass());
    FPM.addPass(std::move(ExtraPasses));
  }
@ -1031,6 +1056,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
  // before SLP vectorization.
  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
                                  .forwardSwitchCondToPhi(true)
+                                  .convertSwitchRangeToICmp(true)
                                  .convertSwitchToLookupTable(true)
                                  .needCanonicalLoops(false)
                                  .hoistCommonInsts(true)
@ -1073,7 +1099,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
    FPM.addPass(
        RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
    FPM.addPass(createFunctionToLoopPassAdaptor(
-        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                 /*AllowSpeculation=*/true),
        /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
  }

@ -1202,7 +1229,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,

  // LoopSink (and other loop passes since the last simplifyCFG) might have
  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
-  OptimizePM.addPass(SimplifyCFGPass());
+  OptimizePM.addPass(
+      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

  OptimizePM.addPass(CoroCleanupPass());

@ -1612,7 +1640,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

  FunctionPassManager MainFPM;
  MainFPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+               /*AllowSpeculation=*/true),
      /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));

  if (RunNewGVN)
@ -1676,8 +1705,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

  // Add late LTO optimization passes.
  // Delete basic blocks, which optimization passes may have killed.
-  MPM.addPass(createModuleToFunctionPassAdaptor(
-      SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
+  MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
+      SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
+          true))));

  // Drop bodies of available eternally objects to improve GlobalDCE.
  MPM.addPass(EliminateAvailableExternallyPass());
--- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def
@ -423,6 +423,7 @@ FUNCTION_PASS_WITH_PARAMS("simplifycfg",
                           },
                          parseSimplifyCFGOptions,
                          "no-forward-switch-cond;forward-switch-cond;"
+                          "no-switch-range-to-icmp;switch-range-to-icmp;"
                          "no-switch-to-lookup;switch-to-lookup;"
                          "no-keep-loops;keep-loops;"
                          "no-hoist-common-insts;hoist-common-insts;"
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -531,6 +531,7 @@ void AArch64PassConfig::addIRPasses() {
  if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
    addPass(createCFGSimplificationPass(SimplifyCFGOptions()
                                            .forwardSwitchCondToPhi(true)
+                                            .convertSwitchRangeToICmp(true)
                                            .convertSwitchToLookupTable(true)
                                            .needCanonicalLoops(false)
                                            .hoistCommonInsts(true)
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@ -118,9 +118,10 @@ HexagonTargetLowering::initializeHVXLowering() {
    setOperationAction(ISD::SPLAT_VECTOR,      MVT::v32f32, Legal);
    // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
    // generated.
-    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
-    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
-    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16,  ByteV);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32,  ByteW);
+    setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32,  ByteV);

    // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
    // independent) handling of it would convert it to a load, which is
@ -780,7 +781,6 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
  SDValue N = HalfV0;
  SDValue M = HalfV1;
  for (unsigned i = 0; i != NumWords/2; ++i) {
-
    // Rotate by element count since last insertion.
    if (Words[i] != Words[n] || VecHist[n] <= 1) {
      Sn = DAG.getConstant(Rn, dl, MVT::i32);
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@ -345,6 +345,7 @@ void HexagonPassConfig::addIRPasses() {
    if (EnableInitialCFGCleanup)
      addPass(createCFGSimplificationPass(SimplifyCFGOptions()
                                              .forwardSwitchCondToPhi(true)
+                                              .convertSwitchRangeToICmp(true)
                                              .convertSwitchToLookupTable(true)
                                              .needCanonicalLoops(false)
                                              .hoistCommonInsts(true)
--- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -4732,18 +4732,19 @@ MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI,
 Register
 MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT,
                                      const MachineFunction &MF) const {
-  // Named registers is expected to be fairly rare. For now, just support $28
-  // since the linux kernel uses it.
+  // The Linux kernel uses $28 and sp.
  if (Subtarget.isGP64bit()) {
    Register Reg = StringSwitch<Register>(RegName)
-                         .Case("$28", Mips::GP_64)
-                         .Default(Register());
+                       .Case("$28", Mips::GP_64)
+                       .Case("sp", Mips::SP_64)
+                       .Default(Register());
    if (Reg)
      return Reg;
  } else {
    Register Reg = StringSwitch<Register>(RegName)
-                         .Case("$28", Mips::GP)
-                         .Default(Register());
+                       .Case("$28", Mips::GP)
+                       .Case("sp", Mips::SP)
+                       .Default(Register());
    if (Reg)
      return Reg;
  }
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp
@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
@ -92,6 +93,18 @@ static cl::opt<bool>
    DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
                                cl::init(false), cl::Hidden);

+static cl::opt<int> IntraSCCCostMultiplier(
+    "intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
+    cl::desc(
+        "Cost multiplier to multiply onto inlined call sites where the "
+        "new call was previously an intra-SCC call (not relevant when the "
+        "original call was already intra-SCC). This can accumulate over "
+        "multiple inlinings (e.g. if a call site already had a cost "
+        "multiplier and one of its inlined calls was also subject to "
+        "this, the inlined call would have the original multiplier "
+        "multiplied by intra-scc-cost-multiplier). This is to prevent tons of "
+        "inlining through a child SCC which can cause terrible compile times"));
+
 /// A flag for test, so we can print the content of the advisor when running it
 /// as part of the default (e.g. -O3) pipeline.
 static cl::opt<bool> KeepAdvisorForPrinting("keep-inline-advisor-for-printing",
@ -876,8 +889,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
      // trigger infinite inlining, much like is prevented within the inliner
      // itself by the InlineHistory above, but spread across CGSCC iterations
      // and thus hidden from the full inline history.
-      if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
-          UR.InlinedInternalEdges.count({&N, C})) {
+      LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee));
+      if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) {
        LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
                             "previously split out of this SCC by inlining: "
                          << F.getName() << " -> " << Callee.getName() << "\n");
@ -897,6 +910,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
        continue;
      }

+      int CBCostMult =
+          getStringFnAttrAsInt(
+              *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
+              .getValueOr(1);
+
      // Setup the data structure used to plumb customization into the
      // `InlineFunction` routine.
      InlineFunctionInfo IFI(
@ -935,9 +953,28 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
            if (tryPromoteCall(*ICB))
              NewCallee = ICB->getCalledFunction();
          }
-          if (NewCallee)
-            if (!NewCallee->isDeclaration())
+          if (NewCallee) {
+            if (!NewCallee->isDeclaration()) {
              Calls->push({ICB, NewHistoryID});
+              // Continually inlining through an SCC can result in huge compile
+              // times and bloated code since we arbitrarily stop at some point
+              // when the inliner decides it's not profitable to inline anymore.
+              // We attempt to mitigate this by making these calls exponentially
+              // more expensive.
+              // This doesn't apply to calls in the same SCC since if we do
+              // inline through the SCC the function will end up being
+              // self-recursive which the inliner bails out on, and inlining
+              // within an SCC is necessary for performance.
+              if (CalleeSCC != C &&
+                  CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) {
+                Attribute NewCBCostMult = Attribute::get(
+                    M.getContext(),
+                    InlineConstants::FunctionInlineCostMultiplierAttributeName,
+                    itostr(CBCostMult * IntraSCCCostMultiplier));
+                ICB->addFnAttr(NewCBCostMult);
+              }
+            }
+          }
        }
      }

--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@ -3712,9 +3712,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
    //                         __kmpc_get_hardware_num_threads_in_block();
    //                       WarpSize = __kmpc_get_warp_size();
    //                       BlockSize = BlockHwSize - WarpSize;
-    //                       if (InitCB >= BlockSize) return;
-    // IsWorkerCheckBB:      bool IsWorker = InitCB >= 0;
+    // IsWorkerCheckBB:      bool IsWorker = InitCB != -1;
    //                       if (IsWorker) {
+    //                         if (InitCB >= BlockSize) return;
    // SMBeginBB:               __kmpc_barrier_simple_generic(...);
    //                         void *WorkFn;
    //                         bool Active = __kmpc_kernel_parallel(&WorkFn);
@ -3771,6 +3771,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
    ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
    InitBB->getTerminator()->eraseFromParent();

+    Instruction *IsWorker =
+        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
+                         ConstantInt::get(KernelInitCB->getType(), -1),
+                         "thread.is_worker", InitBB);
+    IsWorker->setDebugLoc(DLoc);
+    BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
+
    Module &M = *Kernel->getParent();
    auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
    FunctionCallee BlockHwSizeFn =
@ -3780,29 +3787,22 @@ struct AAKernelInfoFunction : AAKernelInfo {
        OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
            M, OMPRTL___kmpc_get_warp_size);
    CallInst *BlockHwSize =
-        CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+        CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
    OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
    BlockHwSize->setDebugLoc(DLoc);
-    CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+    CallInst *WarpSize =
+        CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
    OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
    WarpSize->setDebugLoc(DLoc);
-    Instruction *BlockSize =
-        BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+    Instruction *BlockSize = BinaryOperator::CreateSub(
+        BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
    BlockSize->setDebugLoc(DLoc);
-    Instruction *IsMainOrWorker =
-        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
-                         BlockSize, "thread.is_main_or_worker", InitBB);
+    Instruction *IsMainOrWorker = ICmpInst::Create(
+        ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
+        "thread.is_main_or_worker", IsWorkerCheckBB);
    IsMainOrWorker->setDebugLoc(DLoc);
-    BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
-                       InitBB);
-
-    Instruction *IsWorker =
-        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
-                         ConstantInt::get(KernelInitCB->getType(), -1),
-                         "thread.is_worker", IsWorkerCheckBB);
-    IsWorker->setDebugLoc(DLoc);
-    BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
-                       IsWorkerCheckBB);
+    BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
+                       IsMainOrWorker, IsWorkerCheckBB);

    // Create local storage for the work function pointer.
    const DataLayout &DL = M.getDataLayout();
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -365,7 +365,9 @@ void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM,
    MPM.add(createFunctionInliningPass(IP));
    MPM.add(createSROAPass());
    MPM.add(createEarlyCSEPass());             // Catch trivial redundancies
-    MPM.add(createCFGSimplificationPass());    // Merge & remove BBs
+    MPM.add(createCFGSimplificationPass(
+        SimplifyCFGOptions().convertSwitchRangeToICmp(
+            true)));                           // Merge & remove BBs
    MPM.add(createInstructionCombiningPass()); // Combine silly seq's
    addExtensionsToPM(EP_Peephole, MPM);
  }
@ -404,7 +406,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
      MPM.add(createGVNHoistPass());
    if (EnableGVNSink) {
      MPM.add(createGVNSinkPass());
-      MPM.add(createCFGSimplificationPass());
+      MPM.add(createCFGSimplificationPass(
+          SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
    }
  }

@ -418,7 +421,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
    MPM.add(createJumpThreadingPass());         // Thread jumps.
    MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
  }
-  MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
+  MPM.add(
+      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+          true))); // Merge & remove BBs
  // Combine silly seq's
  if (OptLevel > 2)
    MPM.add(createAggressiveInstCombinerPass());
@ -434,7 +439,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
  // TODO: Investigate the cost/benefit of tail call elimination on debugging.
  if (OptLevel > 1)
    MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
-  MPM.add(createCFGSimplificationPass());      // Merge & remove BBs
+  MPM.add(
+      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+          true)));                            // Merge & remove BBs
  MPM.add(createReassociatePass());           // Reassociate expressions

  // The matrix extension can introduce large vector operations early, which can
@ -451,13 +458,18 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
    MPM.add(createLoopSimplifyCFGPass());
  }
  // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
  // TODO: Investigate promotion cap for O1.
-  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/false));
  // Rotate Loop - disable header duplication at -Oz
  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
  // TODO: Investigate promotion cap for O1.
-  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/true));
  if (EnableSimpleLoopUnswitch)
    MPM.add(createSimpleLoopUnswitchLegacyPass());
  else
@ -465,7 +477,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
  // FIXME: We break the loop pass pipeline here in order to do full
  // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
  // need for this.
-  MPM.add(createCFGSimplificationPass());
+  MPM.add(createCFGSimplificationPass(
+      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
  MPM.add(createInstructionCombiningPass());
  // We resume loop passes creating a second loop pipeline here.
  if (EnableLoopFlatten) {
@ -521,7 +534,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
  // TODO: Investigate if this is too expensive at O1.
  if (OptLevel > 1) {
    MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
-    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           /*AllowSpeculation=*/true));
  }

  addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@ -580,9 +594,11 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
    PM.add(createEarlyCSEPass());
    PM.add(createCorrelatedValuePropagationPass());
    PM.add(createInstructionCombiningPass());
-    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                          /*AllowSpeculation=*/true));
    PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
-    PM.add(createCFGSimplificationPass());
+    PM.add(createCFGSimplificationPass(
+        SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
    PM.add(createInstructionCombiningPass());
  }

@ -597,6 +613,7 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
  // before SLP vectorization.
  PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
                                         .forwardSwitchCondToPhi(true)
+                                         .convertSwitchRangeToICmp(true)
                                         .convertSwitchToLookupTable(true)
                                         .needCanonicalLoops(false)
                                         .hoistCommonInsts(true)
@ -641,7 +658,8 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
      // unrolled loop is a inner loop, then the prologue will be inside the
      // outer loop. LICM pass can help to promote the runtime check out if the
      // checked value is loop invariant.
-      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                            /*AllowSpeculation=*/true));
    }

    PM.add(createWarnMissedTransformationsPass());
@ -772,7 +790,9 @@ void PassManagerBuilder::populateModulePassManager(

  MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
  addExtensionsToPM(EP_Peephole, MPM);
-  MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+  MPM.add(
+      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+          true))); // Clean up after IPCP & DAE

  // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
  // call promotion as it will change the CFG too much to make the 2nd
@ -886,7 +906,8 @@ void PassManagerBuilder::populateModulePassManager(
  // later might get benefit of no-alias assumption in clone loop.
  if (UseLoopVersioningLICM) {
    MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM
-    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           /*AllowSpeculation=*/true));
  }

  // We add a fresh GlobalsModRef run at this point. This is particularly
@ -972,7 +993,8 @@ void PassManagerBuilder::populateModulePassManager(

  // LoopSink (and other loop passes since the last simplifyCFG) might have
  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
-  MPM.add(createCFGSimplificationPass());
+  MPM.add(createCFGSimplificationPass(
+      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));

  addExtensionsToPM(EP_OptimizerLast, MPM);

@ -1120,7 +1142,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
  // Run a few AA driven optimizations here and now, to cleanup the code.
  PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.

-  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
  PM.add(NewGVN ? createNewGVNPass()
                : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
  PM.add(createMemCpyOptPass());            // Remove dead memcpys.
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp
@ -149,13 +149,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                 BlockFrequencyInfo *BFI, const Loop *CurLoop,
                 ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
                 OptimizationRemarkEmitter *ORE);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
-                                           const DominatorTree *DT,
-                                           const TargetLibraryInfo *TLI,
-                                           const Loop *CurLoop,
-                                           const LoopSafetyInfo *SafetyInfo,
-                                           OptimizationRemarkEmitter *ORE,
-                                           const Instruction *CtxI = nullptr);
+static bool isSafeToExecuteUnconditionally(
+    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+    bool AllowSpeculation);
 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                     AliasSetTracker *CurAST, Loop *CurLoop,
                                     AAResults *AA);
@ -188,21 +186,26 @@ struct LoopInvariantCodeMotion {
                 OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);

  LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
-                          unsigned LicmMssaNoAccForPromotionCap)
+                          unsigned LicmMssaNoAccForPromotionCap,
+                          bool LicmAllowSpeculation)
      : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}

 private:
  unsigned LicmMssaOptCap;
  unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;
 };

 struct LegacyLICMPass : public LoopPass {
  static char ID; // Pass identification, replacement for typeid
  LegacyLICMPass(
      unsigned LicmMssaOptCap = SetLicmMssaOptCap,
-      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
-      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
+      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
+      bool LicmAllowSpeculation = true)
+      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           LicmAllowSpeculation) {
    initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
  }

@ -265,7 +268,8 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
  // but ORE cannot be preserved (see comment before the pass definition).
  OptimizationRemarkEmitter ORE(L.getHeader()->getParent());

-  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                               LicmAllowSpeculation);
  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
                      &AR.SE, AR.MSSA, &ORE))
    return PreservedAnalyses::all();
@ -290,7 +294,8 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
  // but ORE cannot be preserved (see comment before the pass definition).
  OptimizationRemarkEmitter ORE(LN.getParent());

-  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                               LicmAllowSpeculation);

  Loop &OutermostLoop = LN.getOutermostLoop();
  bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
@ -321,8 +326,10 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,

 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
-                           unsigned LicmMssaNoAccForPromotionCap) {
-  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+                           unsigned LicmMssaNoAccForPromotionCap,
+                           bool LicmAllowSpeculation) {
+  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                            LicmAllowSpeculation);
 }

 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
@ -418,7 +425,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
  Flags.setIsSink(false);
  if (Preheader)
    Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
-                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
+                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
+                           LicmAllowSpeculation);

  // Now that all loop invariants have been removed from the loop, promote any
  // memory references to scalars that we can.
@ -460,8 +468,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
        for (const SmallSetVector<Value *, 8> &PointerMustAliases :
             collectPromotionCandidates(MSSA, AA, L)) {
          LocalPromoted |= promoteLoopAccessesToScalars(
-              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
-              LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
+              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+              DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
        }
        Promoted |= LocalPromoted;
      } while (LocalPromoted);
@ -825,7 +833,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
                       MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
                       ICFLoopSafetyInfo *SafetyInfo,
                       SinkAndHoistLICMFlags &Flags,
-                       OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
+                       OptimizationRemarkEmitter *ORE, bool LoopNestMode,
+                       bool AllowSpeculation) {
  // Verify inputs.
  assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
         CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
@ -877,7 +886,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
                             true, &Flags, ORE) &&
          isSafeToExecuteUnconditionally(
              I, DT, TLI, CurLoop, SafetyInfo, ORE,
-              CurLoop->getLoopPreheader()->getTerminator())) {
+              CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
        hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
              MSSAU, SE, ORE);
        HoistedInstructions.push_back(&I);
@ -1774,14 +1783,12 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
 /// Only sink or hoist an instruction if it is not a trapping instruction,
 /// or if the instruction is known not to trap when moved to the preheader.
 /// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
-                                           const DominatorTree *DT,
-                                           const TargetLibraryInfo *TLI,
-                                           const Loop *CurLoop,
-                                           const LoopSafetyInfo *SafetyInfo,
-                                           OptimizationRemarkEmitter *ORE,
-                                           const Instruction *CtxI) {
-  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+static bool isSafeToExecuteUnconditionally(
+    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+    bool AllowSpeculation) {
+  if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
    return true;

  bool GuaranteedToExecute =
@ -1949,7 +1956,7 @@ bool llvm::promoteLoopAccessesToScalars(
    SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
    LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
    Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
-    OptimizationRemarkEmitter *ORE) {
+    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
  // Verify inputs.
  assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
         SafetyInfo != nullptr &&
@ -2054,9 +2061,9 @@ bool llvm::promoteLoopAccessesToScalars(
        // to execute does as well.  Thus we can increase our guaranteed
        // alignment as well.
        if (!DereferenceableInPH || (InstAlignment > Alignment))
-          if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
-                                             SafetyInfo, ORE,
-                                             Preheader->getTerminator())) {
+          if (isSafeToExecuteUnconditionally(
+                  *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
+                  Preheader->getTerminator(), AllowSpeculation)) {
            DereferenceableInPH = true;
            Alignment = std::max(Alignment, InstAlignment);
          }
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@ -59,6 +59,11 @@ static cl::opt<bool> UserKeepLoops(
    "keep-loops", cl::Hidden, cl::init(true),
    cl::desc("Preserve canonical loop structure (default = true)"));

+static cl::opt<bool> UserSwitchRangeToICmp(
+    "switch-range-to-icmp", cl::Hidden, cl::init(false),
+    cl::desc(
+        "Convert switches into an integer range comparison (default = false)"));
+
 static cl::opt<bool> UserSwitchToLookup(
    "switch-to-lookup", cl::Hidden, cl::init(false),
    cl::desc("Convert switches to lookup tables (default = false)"));
@ -311,6 +316,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
    Options.BonusInstThreshold = UserBonusInstThreshold;
  if (UserForwardSwitchCond.getNumOccurrences())
    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+  if (UserSwitchRangeToICmp.getNumOccurrences())
+    Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp;
  if (UserSwitchToLookup.getNumOccurrences())
    Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
  if (UserKeepLoops.getNumOccurrences())
@ -337,6 +344,8 @@ void SimplifyCFGPass::printPipeline(
  OS << "<";
  OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
  OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+  OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")
+     << "switch-range-to-icmp;";
  OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
     << "switch-to-lookup;";
  OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
--- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@ -6211,7 +6211,9 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
  }

  // Try to transform the switch into an icmp and a branch.
-  if (TurnSwitchRangeIntoICmp(SI, Builder))
+  // The conversion from switch to comparison may lose information on
+  // impossible switch values, so disable it early in the pipeline.
+  if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
    return requestResimplify();

  // Remove unreachable cases.
--- a/lib/clang/include/VCSVersion.inc
+++ b/lib/clang/include/VCSVersion.inc
@ -1,10 +1,10 @@
 // $FreeBSD$

-#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
+#define LLVM_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a"
 #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"

-#define CLANG_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
+#define CLANG_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a"
 #define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git"

-#define LLDB_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
+#define LLDB_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a"
 #define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git"
--- a/lib/clang/include/lld/Common/Version.inc
+++ b/lib/clang/include/lld/Common/Version.inc
@ -1,4 +1,4 @@
 // Local identifier in __FreeBSD_version style
 #define LLD_FREEBSD_VERSION 1400003

-#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc2-12-g09546e1b5103-" __XSTRING(LLD_FREEBSD_VERSION) ")"
+#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a-" __XSTRING(LLD_FREEBSD_VERSION) ")"
--- a/lib/clang/include/llvm/Support/VCSRevision.h
+++ b/lib/clang/include/llvm/Support/VCSRevision.h
@ -1,3 +1,3 @@
 /* $FreeBSD$ */
-#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
+#define LLVM_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a"
 #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"