Vendor import of llvm-project branch release/11.x

llvmorg-11.0.0-rc2-0-g414f32a9e86.
svn path=/vendor/llvm-project/release-11.x/; revision=364713 svn path=/vendor/llvm-project/llvmorg-11.0.0-rc2-0-g414f32a9e86/; revision=364714; tag=vendor/llvm-project/llvmorg-11.0.0-rc2-0-g414f32a9e86
2020-08-24 17:20:50 +00:00 · 2020-08-24 17:20:50 +00:00 · bdc6feb28f · 2020-12-20 02:59:44 +00:00
commit bdc6feb28f
parent 10c469f2ae
35 changed files with 401 additions and 139 deletions
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@ -54,6 +54,10 @@ class TargetOptions {
  /// be a list of strings starting with by '+' or '-'.
  std::vector<std::string> Features;

+  /// The map of which features have been enabled disabled based on the command
+  /// line.
+  llvm::StringMap<bool> FeatureMap;
+
  /// Supported OpenCL extensions and optional core features.
  OpenCLOptions SupportedOpenCLOptions;

--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@ -1780,7 +1780,7 @@ def fstack_protector_all : Flag<["-"], "fstack-protector-all">, Group<f_Group>,
  HelpText<"Enable stack protectors for all functions">;
 def fstack_clash_protection : Flag<["-"], "fstack-clash-protection">, Group<f_Group>, Flags<[CC1Option]>,
  HelpText<"Enable stack clash protection">;
-def fnostack_clash_protection : Flag<["-"], "fnostack-clash-protection">, Group<f_Group>,
+def fno_stack_clash_protection : Flag<["-"], "fno-stack-clash-protection">, Group<f_Group>,
  HelpText<"Disable stack clash protection">;
 def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Group>,
  HelpText<"Enable stack protectors for some functions vulnerable to stack smashing. "
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@ -11147,8 +11147,7 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
    std::vector<std::string> Features(FeaturesTmp.begin(), FeaturesTmp.end());
    Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
  } else {
-    Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU,
-                           Target->getTargetOpts().Features);
+    FeatureMap = Target->getTargetOpts().FeatureMap;
  }
 }

--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@ -346,6 +346,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
      return new FreeBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
    case llvm::Triple::NetBSD:
      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
+    case llvm::Triple::OpenBSD:
+      return new OpenBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
    case llvm::Triple::AIX:
      return new AIXPPC64TargetInfo(Triple, Opts);
    default:
@ -358,6 +360,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
      return new LinuxTargetInfo<PPC64TargetInfo>(Triple, Opts);
    case llvm::Triple::NetBSD:
      return new NetBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
+    case llvm::Triple::OpenBSD:
+      return new OpenBSDTargetInfo<PPC64TargetInfo>(Triple, Opts);
    default:
      return new PPC64TargetInfo(Triple, Opts);
    }
@ -387,6 +391,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
    switch (os) {
    case llvm::Triple::FreeBSD:
      return new FreeBSDTargetInfo<RISCV64TargetInfo>(Triple, Opts);
+    case llvm::Triple::OpenBSD:
+      return new OpenBSDTargetInfo<RISCV64TargetInfo>(Triple, Opts);
    case llvm::Triple::Fuchsia:
      return new FuchsiaTargetInfo<RISCV64TargetInfo>(Triple, Opts);
    case llvm::Triple::Linux:
@ -662,14 +668,13 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,

  // Compute the default target features, we need the target to handle this
  // because features may have dependencies on one another.
-  llvm::StringMap<bool> Features;
-  if (!Target->initFeatureMap(Features, Diags, Opts->CPU,
+  if (!Target->initFeatureMap(Opts->FeatureMap, Diags, Opts->CPU,
                              Opts->FeaturesAsWritten))
    return nullptr;

  // Add the features to the compile options.
  Opts->Features.clear();
-  for (const auto &F : Features)
+  for (const auto &F : Opts->FeatureMap)
    Opts->Features.push_back((F.getValue() ? "+" : "-") + F.getKey().str());
  // Sort here, so we handle the features in a predictable order. (This matters
  // when we're dealing with features that overlap.)
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@ -465,6 +465,9 @@ class LLVM_LIBRARY_VISIBILITY OpenBSDTargetInfo : public OSTargetInfo<Target> {
 public:
  OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
      : OSTargetInfo<Target>(Triple, Opts) {
+    this->WCharType = this->WIntType = this->SignedInt;
+    this->IntMaxType = TargetInfo::SignedLongLong;
+    this->Int64Type = TargetInfo::SignedLongLong;
    switch (Triple.getArch()) {
    case llvm::Triple::x86:
    case llvm::Triple::x86_64:
@ -476,6 +479,8 @@ class LLVM_LIBRARY_VISIBILITY OpenBSDTargetInfo : public OSTargetInfo<Target> {
    case llvm::Triple::mips64:
    case llvm::Triple::mips64el:
    case llvm::Triple::ppc:
+    case llvm::Triple::ppc64:
+    case llvm::Triple::ppc64le:
    case llvm::Triple::sparcv9:
      this->MCountName = "_mcount";
      break;
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@ -414,8 +414,8 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
      ABI = "elfv1";
    }

-    if (Triple.isOSFreeBSD() || Triple.getOS() == llvm::Triple::AIX ||
-        Triple.isMusl()) {
+    if (Triple.isOSFreeBSD() || Triple.isOSOpenBSD() ||
+        Triple.getOS() == llvm::Triple::AIX || Triple.isMusl()) {
      LongDoubleWidth = LongDoubleAlign = 64;
      LongDoubleFormat = &llvm::APFloat::IEEEdouble();
    }
--- a/clang/lib/Basic/Targets/Sparc.cpp
+++ b/clang/lib/Basic/Targets/Sparc.cpp
@ -240,6 +240,11 @@ void SparcV9TargetInfo::getTargetDefines(const LangOptions &Opts,
    Builder.defineMacro("__sparc_v9__");
    Builder.defineMacro("__sparcv9__");
  }
+
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 }

 void SparcV9TargetInfo::fillValidCPUList(
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@ -96,19 +96,43 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
 }

 void WebAssemblyTargetInfo::setSIMDLevel(llvm::StringMap<bool> &Features,
-                                         SIMDEnum Level) {
+                                         SIMDEnum Level, bool Enabled) {
+  if (Enabled) {
+    switch (Level) {
+    case UnimplementedSIMD128:
+      Features["unimplemented-simd128"] = true;
+      LLVM_FALLTHROUGH;
+    case SIMD128:
+      Features["simd128"] = true;
+      LLVM_FALLTHROUGH;
+    case NoSIMD:
+      break;
+    }
+    return;
+  }
+
  switch (Level) {
-  case UnimplementedSIMD128:
-    Features["unimplemented-simd128"] = true;
-    LLVM_FALLTHROUGH;
-  case SIMD128:
-    Features["simd128"] = true;
-    LLVM_FALLTHROUGH;
  case NoSIMD:
+  case SIMD128:
+    Features["simd128"] = false;
+    LLVM_FALLTHROUGH;
+  case UnimplementedSIMD128:
+    Features["unimplemented-simd128"] = false;
    break;
  }
 }

+void WebAssemblyTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
+                                              StringRef Name,
+                                              bool Enabled) const {
+  if (Name == "simd128")
+    setSIMDLevel(Features, SIMD128, Enabled);
+  else if (Name == "unimplemented-simd128")
+    setSIMDLevel(Features, UnimplementedSIMD128, Enabled);
+  else
+    Features[Name] = Enabled;
+}
+
 bool WebAssemblyTargetInfo::initFeatureMap(
    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
    const std::vector<std::string> &FeaturesVec) const {
@ -119,30 +143,8 @@ bool WebAssemblyTargetInfo::initFeatureMap(
    Features["atomics"] = true;
    Features["mutable-globals"] = true;
    Features["tail-call"] = true;
-    setSIMDLevel(Features, SIMD128);
+    setSIMDLevel(Features, SIMD128, true);
  }
-  // Other targets do not consider user-configured features here, but while we
-  // are actively developing new features it is useful to let user-configured
-  // features control availability of builtins
-  setSIMDLevel(Features, SIMDLevel);
-  if (HasNontrappingFPToInt)
-    Features["nontrapping-fptoint"] = true;
-  if (HasSignExt)
-    Features["sign-ext"] = true;
-  if (HasExceptionHandling)
-    Features["exception-handling"] = true;
-  if (HasBulkMemory)
-    Features["bulk-memory"] = true;
-  if (HasAtomics)
-    Features["atomics"] = true;
-  if (HasMutableGlobals)
-    Features["mutable-globals"] = true;
-  if (HasMultivalue)
-    Features["multivalue"] = true;
-  if (HasTailCall)
-    Features["tail-call"] = true;
-  if (HasReferenceTypes)
-    Features["reference-types"] = true;

  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
 }
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@ -69,7 +69,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
                        MacroBuilder &Builder) const override;

 private:
-  static void setSIMDLevel(llvm::StringMap<bool> &Features, SIMDEnum Level);
+  static void setSIMDLevel(llvm::StringMap<bool> &Features, SIMDEnum Level,
+                           bool Enabled);

  bool
  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
@ -77,6 +78,9 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
                 const std::vector<std::string> &FeaturesVec) const override;
  bool hasFeature(StringRef Feature) const final;

+  void setFeatureEnabled(llvm::StringMap<bool> &Features, StringRef Name,
+                         bool Enabled) const final;
+
  bool handleTargetFeatures(std::vector<std::string> &Features,
                            DiagnosticsEngine &Diags) final;

--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@ -4956,11 +4956,7 @@ bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
 static CudaArch getCudaArch(CodeGenModule &CGM) {
  if (!CGM.getTarget().hasFeature("ptx"))
    return CudaArch::UNKNOWN;
-  llvm::StringMap<bool> Features;
-  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
-                                 CGM.getTarget().getTargetOpts().CPU,
-                                 CGM.getTarget().getTargetOpts().Features);
-  for (const auto &Feature : Features) {
+  for (const auto &Feature : CGM.getTarget().getTargetOpts().FeatureMap) {
    if (Feature.getValue()) {
      CudaArch Arch = StringToCudaArch(Feature.getKey());
      if (Arch != CudaArch::UNKNOWN)
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@ -370,9 +370,11 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
    V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});

  if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                               options::OPT_munaligned_access))
+                               options::OPT_munaligned_access)) {
    if (A->getOption().matches(options::OPT_mno_unaligned_access))
      Features.push_back("+strict-align");
+  } else if (Triple.isOSOpenBSD())
+    Features.push_back("+strict-align");

  if (Args.hasArg(options::OPT_ffixed_x1))
    Features.push_back("+reserve-x1");
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@ -93,13 +93,13 @@ const char *x86::getX86TargetCPU(const ArgList &Args,
    return "x86-64";

  switch (Triple.getOS()) {
-  case llvm::Triple::FreeBSD:
-    return "i686";
  case llvm::Triple::NetBSD:
-  case llvm::Triple::OpenBSD:
    return "i486";
  case llvm::Triple::Haiku:
+  case llvm::Triple::OpenBSD:
    return "i586";
+  case llvm::Triple::FreeBSD:
+    return "i686";
  default:
    // Fallback to p4.
    return "pentium4";
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@ -1879,8 +1879,8 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
        ABIName = "elfv1-qpx";
        break;
      }
-
-      if (T.isMusl() || (T.isOSFreeBSD() && T.getOSMajorVersion() >= 13))
+      if ((T.isOSFreeBSD() && T.getOSMajorVersion() >= 13) ||
+          T.isOSOpenBSD() || T.isMusl())
        ABIName = "elfv2";
      else
        ABIName = "elfv1";
@ -2971,7 +2971,7 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
    return;

  if (Args.hasFlag(options::OPT_fstack_clash_protection,
-                   options::OPT_fnostack_clash_protection, false))
+                   options::OPT_fno_stack_clash_protection, false))
    CmdArgs.push_back("-fstack-clash-protection");
 }

--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@ -452,10 +452,12 @@ struct _LIBUNWIND_HIDDEN dl_iterate_cb_data {
    #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
  #endif

+#if defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE)
 #include "FrameHeaderCache.hpp"

 // There should be just one of these per process.
 static FrameHeaderCache ProcessFrameHeaderCache;
+#endif

 static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
                               dl_iterate_cb_data *cbdata) {
@ -476,8 +478,10 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
  auto cbdata = static_cast<dl_iterate_cb_data *>(data);
  if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
    return 0;
+#if defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE)
  if (ProcessFrameHeaderCache.find(pinfo, pinfo_size, data))
    return 1;
+#endif

  Elf_Addr image_base = calculateImageBase(pinfo);
  bool found_obj = false;
@ -505,7 +509,9 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
      found_obj = checkAddrInSegment(phdr, image_base, cbdata);
    }
    if (found_obj && found_hdr) {
+#if defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE)
      ProcessFrameHeaderCache.add(cbdata->sects);
+#endif
      return 1;
    }
  }
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@ -28,6 +28,10 @@ ELF Improvements
  chrome://tracing. The file can be specified with ``--time-trace-file``.
  Trace granularity can be specified with ``--time-trace-granularity``.
  (`D71060 <https://reviews.llvm.org/D71060>`_)
+* For ARM architectures the default max page size was increased to 64k.
+  This increases compatibility with systems where a non standard page
+  size was configured. This also is inline with GNU ld defaults.
+  (`D77330 <https://reviews.llvm.org/D77330>`_)
 * ...

 Breaking changes
@ -40,12 +44,17 @@ Breaking changes
 COFF Improvements
 -----------------

-* ...
+* Fixed exporting symbols whose names contain a period (``.``), which was
+  a regression in lld 7.

 MinGW Improvements
 ------------------

-* ...
+* Implemented new options for disabling auto import and runtime pseudo
+  relocations (``--disable-auto-import`` and
+  ``--disable-runtime-pseudo-reloc``), the ``--no-seh`` flag and options
+  for selecting file and section alignment (``--file-alignment`` and
+  ``--section-alignment``).

 MachO Improvements
 ------------------
--- a/lld/docs/conf.py
+++ b/lld/docs/conf.py
@ -134,7 +134,7 @@
 #html_use_smartypants = True

 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'index': 'indexsidebar.html'}
+html_sidebars = {'index': ['indexsidebar.html']}

 # Additional templates that should be rendered to pages, maps page names to
 # template names.
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@ -467,10 +467,12 @@ class MCCFIInstruction {
    unsigned Register2;
  };
  std::vector<char> Values;
+  std::string Comment;

-  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V)
+  MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V,
+                   StringRef Comment = "")
      : Operation(Op), Label(L), Register(R), Offset(O),
-        Values(V.begin(), V.end()) {
+        Values(V.begin(), V.end()), Comment(Comment) {
    assert(Op != OpRegister);
  }

@ -570,8 +572,9 @@ class MCCFIInstruction {

  /// .cfi_escape Allows the user to add arbitrary bytes to the unwind
  /// info.
-  static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) {
-    return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
+  static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals,
+                                       StringRef Comment = "") {
+    return MCCFIInstruction(OpEscape, L, 0, 0, Vals, Comment);
  }

  /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
@ -606,6 +609,10 @@ class MCCFIInstruction {
    assert(Operation == OpEscape);
    return StringRef(&Values[0], Values.size());
  }
+
+  StringRef getComment() const {
+    return Comment;
+  }
 };

 struct MCDwarfFrameInfo {
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@ -241,6 +241,7 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
    OutStreamer->emitCFIGnuArgsSize(Inst.getOffset());
    break;
  case MCCFIInstruction::OpEscape:
+    OutStreamer->AddComment(Inst.getComment());
    OutStreamer->emitCFIEscape(Inst.getValues());
    break;
  case MCCFIInstruction::OpRestore:
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@ -37,6 +37,10 @@ class FeatureBitset {
      set(I);
  }

+  bool any() const {
+    return llvm::any_of(Bits, [](uint64_t V) { return V != 0; });
+  }
+
  constexpr FeatureBitset &set(unsigned I) {
    // GCC <6.2 crashes if this is written in a single statement.
    uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32));
@ -89,6 +93,13 @@ class FeatureBitset {
      Result.Bits[I] = ~Bits[I];
    return Result;
  }
+
+  constexpr bool operator!=(const FeatureBitset &RHS) const {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+      if (Bits[I] != RHS.Bits[I])
+        return true;
+    return false;
+  }
 };

 struct ProcInfo {
@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
 // For each feature that is (transitively) implied by this feature, set it.
 static void getImpliedEnabledFeatures(FeatureBitset &Bits,
                                      const FeatureBitset &Implies) {
+  // Fast path: Implies is often empty.
+  if (!Implies.any())
+    return;
+  FeatureBitset Prev;
  Bits |= Implies;
-  for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
-    if (Implies[i])
-      getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures);
-  }
+  do {
+    Prev = Bits;
+    for (unsigned i = CPU_FEATURE_MAX; i;)
+      if (Bits[--i])
+        Bits |= FeatureInfos[i].ImpliedFeatures;
+  } while (Prev != Bits);
 }

 /// Create bit vector of features that are implied disabled if the feature
@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits,
 static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
  // Check all features looking for any dependent on this feature. If we find
  // one, mark it and recursively find any feature that depend on it.
-  for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
-    if (FeatureInfos[i].ImpliedFeatures[Value]) {
-      Bits.set(i);
-      getImpliedDisabledFeatures(Bits, i);
-    }
-  }
+  FeatureBitset Prev;
+  Bits.set(Value);
+  do {
+    Prev = Bits;
+    for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+      if ((FeatureInfos[i].ImpliedFeatures & Bits).any())
+        Bits.set(i);
+  } while (Prev != Bits);
 }

 void llvm::X86::getImpliedFeatures(
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@ -148,6 +148,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
@ -399,12 +400,102 @@ static bool ShouldSignReturnAddress(MachineFunction &MF) {
  return false;
 }

+// Convenience function to create a DWARF expression for
+//   Expr + NumBytes + NumVGScaledBytes * AArch64::VG
+static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
+                                     int NumBytes, int NumVGScaledBytes, unsigned VG,
+                                     llvm::raw_string_ostream &Comment) {
+  uint8_t buffer[16];
+
+  if (NumBytes) {
+    Expr.push_back(dwarf::DW_OP_consts);
+    Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
+    Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+    Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+  }
+
+  if (NumVGScaledBytes) {
+    Expr.push_back((uint8_t)dwarf::DW_OP_consts);
+    Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
+
+    Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
+    Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
+    Expr.push_back(0);
+
+    Expr.push_back((uint8_t)dwarf::DW_OP_mul);
+    Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+    Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
+            << std::abs(NumVGScaledBytes) << " * VG";
+  }
+}
+
+// Creates an MCCFIInstruction:
+//    { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
+MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
+    const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
+  int64_t NumBytes, NumVGScaledBytes;
+  OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+
+  std::string CommentBuffer = "sp";
+  llvm::raw_string_ostream Comment(CommentBuffer);
+
+  // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG)
+  SmallString<64> Expr;
+  Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31));
+  Expr.push_back(0);
+  appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
+                           TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+  // Wrap this into DW_CFA_def_cfa.
+  SmallString<64> DefCfaExpr;
+  DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+  uint8_t buffer[16];
+  DefCfaExpr.append(buffer,
+                    buffer + encodeULEB128(Expr.size(), buffer));
+  DefCfaExpr.append(Expr.str());
+  return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
+                                        Comment.str());
+}
+
+MCCFIInstruction AArch64FrameLowering::createCfaOffset(
+    const TargetRegisterInfo &TRI, unsigned Reg,
+    const StackOffset &OffsetFromDefCFA) const {
+  int64_t NumBytes, NumVGScaledBytes;
+  OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+
+  unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+
+  // Non-scalable offsets can use DW_CFA_offset directly.
+  if (!NumVGScaledBytes)
+    return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
+
+  std::string CommentBuffer;
+  llvm::raw_string_ostream Comment(CommentBuffer);
+  Comment << printReg(Reg, &TRI) << "  @ cfa";
+
+  // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
+  SmallString<64> OffsetExpr;
+  appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
+                           TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+  // Wrap this into DW_CFA_expression
+  SmallString<64> CfaExpr;
+  CfaExpr.push_back(dwarf::DW_CFA_expression);
+  uint8_t buffer[16];
+  CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
+  CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
+  CfaExpr.append(OffsetExpr.str());
+
+  return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
+}
+
 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
  MachineFunction &MF = *MBB.getParent();
  MachineFrameInfo &MFI = MF.getFrameInfo();
  const TargetSubtargetInfo &STI = MF.getSubtarget();
-  const MCRegisterInfo *MRI = STI.getRegisterInfo();
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
  const TargetInstrInfo *TII = STI.getInstrInfo();
  DebugLoc DL = MBB.findDebugLoc(MBBI);

@ -415,11 +506,26 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(

  for (const auto &Info : CSI) {
    unsigned Reg = Info.getReg();
-    int64_t Offset =
-        MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
-    unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-    unsigned CFIIndex = MF.addFrameInst(
-        MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+
+    // Not all unwinders may know about SVE registers, so assume the lowest
+    // common demoninator.
+    unsigned NewReg;
+    if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg))
+      Reg = NewReg;
+    else
+      continue;
+
+    StackOffset Offset;
+    if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) {
+      AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+      Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) -
+               StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8);
+    } else {
+      Offset = {MFI.getObjectOffset(Info.getFrameIdx()) -
+                    getOffsetOfLocalArea(),
+                MVT::i8};
+    }
+    unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
        .addCFIIndex(CFIIndex)
        .setMIFlags(MachineInstr::FrameSetup);
@ -1383,9 +1489,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
    } else {
-      // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MF.addFrameInst(
-          MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+      unsigned CFIIndex;
+      if (SVEStackSize) {
+        const TargetSubtargetInfo &STI = MF.getSubtarget();
+        const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+        StackOffset TotalSize =
+            SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8);
+        CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
+      } else {
+        // Encode the stack size of the leaf function.
+        CFIIndex = MF.addFrameInst(
+            MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+      }
      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
          .addCFIIndex(CFIIndex)
          .setMIFlags(MachineInstr::FrameSetup);
@ -2006,6 +2121,7 @@ static void computeCalleeSaveRegisterPairs(
  // available unwind codes.  This flag assures that the alignment fixup is done
  // only once, as intened.
  bool FixupDone = false;
+
  for (unsigned i = 0; i < Count; ++i) {
    RegPairInfo RPI;
    RPI.Reg1 = CSI[i].getReg();
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@ -18,6 +18,8 @@

 namespace llvm {

+class MCCFIInstruction;
+
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
  explicit AArch64FrameLowering()
@ -119,6 +121,11 @@ class AArch64FrameLowering : public TargetFrameLowering {
  int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
                                      int &MinCSFrameIndex,
                                      int &MaxCSFrameIndex) const;
+  MCCFIInstruction
+  createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI,
+                               const StackOffset &OffsetFromSP) const;
+  MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg,
+                                   const StackOffset &OffsetFromDefCFA) const;
  bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
                                                unsigned StackBumpBytes) const;
 };
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -4107,6 +4107,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) {
 static bool mayTailCallThisCC(CallingConv::ID CC) {
  switch (CC) {
  case CallingConv::C:
+  case CallingConv::AArch64_SVE_VectorCall:
  case CallingConv::PreserveMost:
  case CallingConv::Swift:
    return true;
@ -4126,6 +4127,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
  MachineFunction &MF = DAG.getMachineFunction();
  const Function &CallerF = MF.getFunction();
  CallingConv::ID CallerCC = CallerF.getCallingConv();
+
+  // If this function uses the C calling convention but has an SVE signature,
+  // then it preserves more registers and should assume the SVE_VectorCall CC.
+  // The check for matching callee-saved regs will determine whether it is
+  // eligible for TCO.
+  if (CallerCC == CallingConv::C &&
+      AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
+    CallerCC = CallingConv::AArch64_SVE_VectorCall;
+
  bool CCMatch = CallerCC == CalleeCC;

  // When using the Windows calling convention on a non-windows OS, we want
@ -4313,6 +4323,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
  bool IsSibCall = false;

+  // Check callee args/returns for SVE registers and set calling convention
+  // accordingly.
+  if (CallConv == CallingConv::C) {
+    bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
+      return Out.VT.isScalableVector();
+    });
+    bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
+      return In.VT.isScalableVector();
+    });
+
+    if (CalleeInSVE || CalleeOutSVE)
+      CallConv = CallingConv::AArch64_SVE_VectorCall;
+  }
+
  if (IsTailCall) {
    // Check if it's really possible to do a tail call.
    IsTailCall = isEligibleForTailCallOptimization(
@ -4666,20 +4690,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
    Ops.push_back(DAG.getRegister(RegToPass.first,
                                  RegToPass.second.getValueType()));

-  // Check callee args/returns for SVE registers and set calling convention
-  // accordingly.
-  if (CallConv == CallingConv::C) {
-    bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
-      return Out.VT.isScalableVector();
-    });
-    bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
-      return In.VT.isScalableVector();
-    });
-
-    if (CalleeInSVE || CalleeOutSVE)
-      CallConv = CallingConv::AArch64_SVE_VectorCall;
-  }
-
  // Add a register mask operand representing the call-preserved registers.
  const uint32_t *Mask;
  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@ -40,7 +40,30 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
  AArch64_MC::initLLVMToCVRegMapping(this);
 }

-static bool hasSVEArgsOrReturn(const MachineFunction *MF) {
+/// Return whether the register needs a CFI entry. Not all unwinders may know
+/// about SVE registers, so we assume the lowest common denominator, i.e. the
+/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the
+/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is
+/// returned in \p RegToUseForCFI.
+bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg,
+                                      unsigned &RegToUseForCFI) const {
+  if (AArch64::PPRRegClass.contains(Reg))
+    return false;
+
+  if (AArch64::ZPRRegClass.contains(Reg)) {
+    RegToUseForCFI = getSubReg(Reg, AArch64::dsub);
+    for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) {
+      if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI)
+        return true;
+    }
+    return false;
+  }
+
+  RegToUseForCFI = Reg;
+  return true;
+}
+
+bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) {
  const Function &F = MF->getFunction();
  return isa<ScalableVectorType>(F.getReturnType()) ||
         any_of(F.args(), [](const Argument &Arg) {
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@ -42,6 +42,8 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
  void UpdateCustomCallPreservedMask(MachineFunction &MF,
                                     const uint32_t **Mask) const;

+  static bool hasSVEArgsOrReturn(const MachineFunction *MF);
+
  /// Code Generation virtual methods...
  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
  const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const;
@ -122,6 +124,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
                               MachineFunction &MF) const override;

  unsigned getLocalAddressRegister(const MachineFunction &MF) const;
+  bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
 };

 } // end namespace llvm
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@ -133,6 +133,9 @@ def NZCV  : AArch64Reg<0, "nzcv">;
 // First fault status register
 def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;

+// Purely virtual Vector Granule (VG) Dwarf register
+def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>;
+
 // GPR register classes with the intersections of GPR32/GPR32sp and
 // GPR64/GPR64sp for use by the coalescer.
 def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@ -1765,7 +1765,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
  defm : unpred_store<         store, nxv4f16, ST1H_S_IMM, PTRUE_S>;
  defm : unpred_store<         store, nxv2f16, ST1H_D_IMM, PTRUE_D>;
  defm : unpred_store<         store, nxv4f32,   ST1W_IMM, PTRUE_S>;
-  defm : unpred_store<         store, nxv4f32, ST1W_D_IMM, PTRUE_D>;
+  defm : unpred_store<         store, nxv2f32, ST1W_D_IMM, PTRUE_D>;
  defm : unpred_store<         store, nxv2f64,   ST1D_IMM, PTRUE_D>;

  multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst,
--- a/llvm/lib/Target/AArch64/AArch64StackOffset.h
+++ b/llvm/lib/Target/AArch64/AArch64StackOffset.h
@ -123,6 +123,18 @@ class StackOffset {
    }
  }

+  void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const {
+    assert(isValid() && "Invalid frame offset");
+
+    // VGSized offsets are divided by '2', because the VG register is the
+    // the number of 64bit granules as opposed to 128bit vector chunks,
+    // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
+    // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
+    // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
+    ByteSized = Bytes;
+    VGSized = ScalableBytes / 2;
+  }
+
  /// Returns whether the offset is known zero.
  explicit operator bool() const { return Bytes || ScalableBytes; }

--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@ -160,7 +160,7 @@ bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
    I->eraseFromParent();
    if (Op1->use_empty())
      Op1->eraseFromParent();
-    if (Op2->use_empty())
+    if (Op1 != Op2 && Op2->use_empty())
      Op2->eraseFromParent();

    return true;
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -2653,22 +2653,35 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
  return LoadSpillOpcodesArray[getSpillTarget()];
 }

-void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
                                     unsigned RegNo) const {
  // Conservatively clear kill flag for the register if the instructions are in
  // different basic blocks and in SSA form, because the kill flag may no longer
  // be right. There is no need to bother with dead flags since defs with no
  // uses will be handled by DCE.
-  MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo();
-  if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) {
+  MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo();
+  if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) {
    MRI.clearKillFlags(RegNo);
    return;
  }

  // Instructions between [StartMI, EndMI] should be in same basic block.
-  assert((StartMI.getParent() == EndMI.getParent()) &&
+  assert((StartMI->getParent() == EndMI->getParent()) &&
         "Instructions are not in same basic block");

+  // If before RA, StartMI may be def through COPY, we need to adjust it to the
+  // real def. See function getForwardingDefMI.
+  if (MRI.isSSA()) {
+    bool Reads, Writes;
+    std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo);
+    if (!Reads && !Writes) {
+      assert(Register::isVirtualRegister(RegNo) &&
+             "Must be a virtual register");
+      // Get real def and ignore copies.
+      StartMI = MRI.getVRegDef(RegNo);
+    }
+  }
+
  bool IsKillSet = false;

  auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
@ -2681,21 +2694,21 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
  // Set killed flag for EndMI.
  // No need to do anything if EndMI defines RegNo.
  int UseIndex =
-      EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+      EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
  if (UseIndex != -1) {
-    EndMI.getOperand(UseIndex).setIsKill(true);
+    EndMI->getOperand(UseIndex).setIsKill(true);
    IsKillSet = true;
    // Clear killed flag for other EndMI operands related to RegNo. In some
    // upexpected cases, killed may be set multiple times for same register
    // operand in same MI.
-    for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+    for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i)
      if (i != UseIndex)
-        clearOperandKillInfo(EndMI, i);
+        clearOperandKillInfo(*EndMI, i);
  }

  // Walking the inst in reverse order (EndMI -> StartMI].
-  MachineBasicBlock::reverse_iterator It = EndMI;
-  MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+  MachineBasicBlock::reverse_iterator It = *EndMI;
+  MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend();
  // EndMI has been handled above, skip it here.
  It++;
  MachineOperand *MO = nullptr;
@ -2721,13 +2734,13 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
      } else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
                                                  &getRegisterInfo()))) {
        // No use found, set dead for its def.
-        assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+        assert(&*It == StartMI && "No new def between StartMI and EndMI.");
        MO->setIsDead(true);
        break;
      }
    }

-    if ((&*It) == &StartMI)
+    if ((&*It) == StartMI)
      break;
  }
  // Ensure RegMo liveness is killed after EndMI.
@ -3858,7 +3871,7 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
    // ForwardingOperandReg = LI imm1
    // y = op2 imm2, ForwardingOperandReg(killed)
    if (IsForwardingOperandKilled)
-      fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg);
+      fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg);

    LLVM_DEBUG(dbgs() << "With:\n");
    LLVM_DEBUG(MI.dump());
@ -3950,9 +3963,9 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(

    // Update kill flag
    if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
-      fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+      fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
    if (ForwardKilledOperandReg != ~0U)
-      fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+      fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
  }

  LLVM_DEBUG(dbgs() << "With:\n");
@ -4063,12 +4076,12 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
  // x = ADD reg(killed), imm
  // y = XOP 0, x
  if (IsFwdFeederRegKilled || RegMO->isKill())
-    fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+    fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
  // Pattern 3:
  // ForwardKilledOperandReg = ADD reg, imm
  // y = XOP 0, ForwardKilledOperandReg(killed)
  if (ForwardKilledOperandReg != ~0U)
-    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+    fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);

  LLVM_DEBUG(dbgs() << "With:\n");
  LLVM_DEBUG(MI.dump());
@ -4224,7 +4237,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
  // ForwardKilledOperandReg = LI imm
  // y = XOP reg, ForwardKilledOperandReg(killed)
  if (ForwardKilledOperandReg != ~0U)
-    fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+    fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
  return true;
 }

--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@ -570,14 +570,16 @@ class PPCInstrInfo : public PPCGenInstrInfo {
  /// up. Before calling this function,
  /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
  /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
-  ///    and possible definition for \p RegNo is \p StartMI or \p EndMI.
+  ///    and possible definition for \p RegNo is \p StartMI or \p EndMI. For
+  ///    pre-RA cases, definition may be \p StartMI through COPY, \p StartMI
+  ///    will be adjust to true definition.
  /// 3. We can do accurate fixup for the case when all instructions between
  ///    [\p StartMI, \p EndMI] are in same basic block.
  /// 4. For the case when \p StartMI and \p EndMI are not in same basic block,
  ///    we conservatively clear kill flag for all uses of \p RegNo for pre-RA
  ///    and for post-RA, we give an assertion as without reaching definition
  ///    analysis post-RA, \p StartMI and \p EndMI are hard to keep right.
-  void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+  void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
                         unsigned RegNo) const;
  void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
  void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@ -279,7 +279,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,

  // Handle a single unconditional branch.
  if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
-    TBB = I->getOperand(0).getMBB();
+    TBB = getBranchDestBlock(*I);
    return false;
  }

@ -293,7 +293,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
  if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
      I->getDesc().isUnconditionalBranch()) {
    parseCondBranch(*std::prev(I), TBB, Cond);
-    FBB = I->getOperand(0).getMBB();
+    FBB = getBranchDestBlock(*I);
    return false;
  }

@ -384,10 +384,6 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,

  MachineFunction *MF = MBB.getParent();
  MachineRegisterInfo &MRI = MF->getRegInfo();
-  const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
-
-  if (TM.isPositionIndependent())
-    report_fatal_error("Unable to insert indirect branch");

  if (!isInt<32>(BrOffset))
    report_fatal_error(
@ -399,15 +395,13 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
  Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
  auto II = MBB.end();

-  MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg)
-                             .addMBB(&DestBB, RISCVII::MO_HI);
-  BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND))
-      .addReg(ScratchReg, RegState::Kill)
-      .addMBB(&DestBB, RISCVII::MO_LO);
+  MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
+                          .addReg(ScratchReg, RegState::Define | RegState::Dead)
+                          .addMBB(&DestBB, RISCVII::MO_CALL);

  RS->enterBasicBlockEnd(MBB);
  unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
-                                                LuiMI.getIterator(), false, 0);
+                                                MI.getIterator(), false, 0);
  MRI.replaceRegWith(ScratchReg, Scav);
  MRI.clearVirtRegs();
  RS->setRegUsed(Scav);
@ -431,6 +425,7 @@ RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {

 bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
                                           int64_t BrOffset) const {
+  unsigned XLen = STI.getXLen();
  // Ideally we could determine the supported branch offset from the
  // RISCVII::FormMask, but this can't be used for Pseudo instructions like
  // PseudoBR.
@ -447,6 +442,8 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
  case RISCV::JAL:
  case RISCV::PseudoBR:
    return isIntN(21, BrOffset);
+  case RISCV::PseudoJump:
+    return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
  }
 }

--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@ -1012,8 +1012,8 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
 def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
          (PseudoTAIL texternalsym:$dst)>;

-let isCall = 0, isBarrier = 0, isCodeGenOnly = 0, hasSideEffects = 0,
-    mayStore = 0, mayLoad = 0 in
+let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1,
+    isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
 def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
  let AsmString = "jump\t$target, $rd";
 }
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -3208,13 +3208,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
    return DAG.getFrameIndex(FI, PtrVT);
  }

+  EVT ArgVT = Ins[i].ArgVT;
+
+  // If this is a vector that has been split into multiple parts, and the
+  // scalar size of the parts don't match the vector element size, then we can't
+  // elide the copy. The parts will have padding between them instead of being
+  // packed like a vector.
+  bool ScalarizedAndExtendedVector =
+      ArgVT.isVector() && !VA.getLocVT().isVector() &&
+      VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
+
  // This is an argument in memory. We might be able to perform copy elision.
  // If the argument is passed directly in memory without any extension, then we
  // can perform copy elision. Large vector types, for example, may be passed
  // indirectly by pointer.
  if (Flags.isCopyElisionCandidate() &&
-      VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
-    EVT ArgVT = Ins[i].ArgVT;
+      VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
+      !ScalarizedAndExtendedVector) {
    SDValue PartAddr;
    if (Ins[i].PartOffset == 0) {
      // If this is a one-part value or the first part of a multi-part value,
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@ -468,19 +468,16 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) {
 /// Copy over the debug info for a variable to its SRA replacements.
 static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
                                 uint64_t FragmentOffsetInBits,
-                                 uint64_t FragmentSizeInBits) {
+                                 uint64_t FragmentSizeInBits,
+                                 uint64_t VarSize) {
  SmallVector<DIGlobalVariableExpression *, 1> GVs;
  GV->getDebugInfo(GVs);
  for (auto *GVE : GVs) {
    DIVariable *Var = GVE->getVariable();
-    Optional<uint64_t> VarSize = Var->getSizeInBits();
-
    DIExpression *Expr = GVE->getExpression();
    // If the FragmentSize is smaller than the variable,
    // emit a fragment expression.
-    // If the variable size is unknown a fragment must be
-    // emitted to be safe.
-    if (!VarSize || FragmentSizeInBits < *VarSize) {
+    if (FragmentSizeInBits < VarSize) {
      if (auto E = DIExpression::createFragmentExpression(
              Expr, FragmentOffsetInBits, FragmentSizeInBits))
        Expr = *E;
@ -505,6 +502,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
  assert(GV->hasLocalLinkage());
  Constant *Init = GV->getInitializer();
  Type *Ty = Init->getType();
+  uint64_t VarSize = DL.getTypeSizeInBits(Ty);

  std::map<unsigned, GlobalVariable *> NewGlobals;

@ -560,7 +558,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
      // Copy over the debug info for the variable.
      uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
      uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
-      transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size);
+      transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize);
    } else {
      uint64_t EltSize = DL.getTypeAllocSize(ElTy);
      Align EltAlign = DL.getABITypeAlign(ElTy);
@ -573,7 +571,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
      if (NewAlign > EltAlign)
        NGV->setAlignment(NewAlign);
      transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
-                           FragmentSizeInBits);
+                           FragmentSizeInBits, VarSize);
    }
  }

--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@ -216,7 +216,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {

    if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
      // Replace X*(2^C) with X << C, where C is either a scalar or a vector.
-      if (Constant *NewCst = getLogBase2(NewOp->getType(), C1)) {
+      // Note that we need to sanitize undef multipliers to 1,
+      // to avoid introducing poison.
+      Constant *SafeC1 = Constant::replaceUndefsWith(
+          C1, ConstantInt::get(C1->getType()->getScalarType(), 1));
+      if (Constant *NewCst = getLogBase2(NewOp->getType(), SafeC1)) {
        BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);

        if (I.hasNoUnsignedWrap())