Vendor import of llvm-project branch release/11.x

llvmorg-11.0.0-rc2-91-g6e042866c30.
2020-09-15 18:55:37 +00:00 · 2020-09-15 18:55:37 +00:00 · e588341d48
commit e588341d48
parent bdc6feb28f
67 changed files with 1296 additions and 777 deletions
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@ -60,6 +60,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TypeSize.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@ -1297,6 +1298,21 @@ class ASTContext : public RefCountedBase<ASTContext> {
  /// Returns a vla type where known sizes are replaced with [*].
  QualType getVariableArrayDecayedType(QualType Ty) const;

+  // Convenience struct to return information about a builtin vector type.
+  struct BuiltinVectorTypeInfo {
+    QualType ElementType;
+    llvm::ElementCount EC;
+    unsigned NumVectors;
+    BuiltinVectorTypeInfo(QualType ElementType, llvm::ElementCount EC,
+                          unsigned NumVectors)
+        : ElementType(ElementType), EC(EC), NumVectors(NumVectors) {}
+  };
+
+  /// Returns the element type, element count and number of vectors
+  /// (in case of tuple) for a builtin vector type.
+  BuiltinVectorTypeInfo
+  getBuiltinVectorTypeInfo(const BuiltinType *VecTy) const;
+
  /// Return the unique reference to a scalable vector type of the specified
  /// element type and scalable number of elements.
  ///
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@ -6021,9 +6021,8 @@ def err_func_def_incomplete_result : Error<
 def err_atomic_specifier_bad_type
    : Error<"_Atomic cannot be applied to "
            "%select{incomplete |array |function |reference |atomic |qualified "
-            "|sizeless ||integer |integer }0type "
-            "%1 %select{|||||||which is not trivially copyable|with less than "
-            "1 byte of precision|with a non power of 2 precision}0">;
+            "|sizeless ||integer }0type "
+            "%1 %select{|||||||which is not trivially copyable|}0">;

 // Expressions.
 def ext_sizeof_alignof_function_type : Extension<
@ -7941,6 +7940,8 @@ def err_atomic_exclusive_builtin_pointer_size : Error<
  " 1,2,4 or 8 byte type (%0 invalid)">;
 def err_atomic_builtin_ext_int_size : Error<
  "Atomic memory operand must have a power-of-two size">;
+def err_atomic_builtin_ext_int_prohibit : Error<
+  "argument to atomic builtin of type '_ExtInt' is not supported">;
 def err_atomic_op_needs_atomic : Error<
  "address argument to atomic operation must be a pointer to _Atomic "
  "type (%0 invalid)">;
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@ -3634,6 +3634,119 @@ QualType ASTContext::getIncompleteArrayType(QualType elementType,
  return QualType(newType, 0);
 }

+ASTContext::BuiltinVectorTypeInfo
+ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
+#define SVE_INT_ELTTY(BITS, ELTS, SIGNED, NUMVECTORS)                          \
+  {getIntTypeForBitwidth(BITS, SIGNED), llvm::ElementCount(ELTS, true),        \
+   NUMVECTORS};
+
+#define SVE_ELTTY(ELTTY, ELTS, NUMVECTORS)                                     \
+  {ELTTY, llvm::ElementCount(ELTS, true), NUMVECTORS};
+
+  switch (Ty->getKind()) {
+  default:
+    llvm_unreachable("Unsupported builtin vector type");
+  case BuiltinType::SveInt8:
+    return SVE_INT_ELTTY(8, 16, true, 1);
+  case BuiltinType::SveUint8:
+    return SVE_INT_ELTTY(8, 16, false, 1);
+  case BuiltinType::SveInt8x2:
+    return SVE_INT_ELTTY(8, 16, true, 2);
+  case BuiltinType::SveUint8x2:
+    return SVE_INT_ELTTY(8, 16, false, 2);
+  case BuiltinType::SveInt8x3:
+    return SVE_INT_ELTTY(8, 16, true, 3);
+  case BuiltinType::SveUint8x3:
+    return SVE_INT_ELTTY(8, 16, false, 3);
+  case BuiltinType::SveInt8x4:
+    return SVE_INT_ELTTY(8, 16, true, 4);
+  case BuiltinType::SveUint8x4:
+    return SVE_INT_ELTTY(8, 16, false, 4);
+  case BuiltinType::SveInt16:
+    return SVE_INT_ELTTY(16, 8, true, 1);
+  case BuiltinType::SveUint16:
+    return SVE_INT_ELTTY(16, 8, false, 1);
+  case BuiltinType::SveInt16x2:
+    return SVE_INT_ELTTY(16, 8, true, 2);
+  case BuiltinType::SveUint16x2:
+    return SVE_INT_ELTTY(16, 8, false, 2);
+  case BuiltinType::SveInt16x3:
+    return SVE_INT_ELTTY(16, 8, true, 3);
+  case BuiltinType::SveUint16x3:
+    return SVE_INT_ELTTY(16, 8, false, 3);
+  case BuiltinType::SveInt16x4:
+    return SVE_INT_ELTTY(16, 8, true, 4);
+  case BuiltinType::SveUint16x4:
+    return SVE_INT_ELTTY(16, 8, false, 4);
+  case BuiltinType::SveInt32:
+    return SVE_INT_ELTTY(32, 4, true, 1);
+  case BuiltinType::SveUint32:
+    return SVE_INT_ELTTY(32, 4, false, 1);
+  case BuiltinType::SveInt32x2:
+    return SVE_INT_ELTTY(32, 4, true, 2);
+  case BuiltinType::SveUint32x2:
+    return SVE_INT_ELTTY(32, 4, false, 2);
+  case BuiltinType::SveInt32x3:
+    return SVE_INT_ELTTY(32, 4, true, 3);
+  case BuiltinType::SveUint32x3:
+    return SVE_INT_ELTTY(32, 4, false, 3);
+  case BuiltinType::SveInt32x4:
+    return SVE_INT_ELTTY(32, 4, true, 4);
+  case BuiltinType::SveUint32x4:
+    return SVE_INT_ELTTY(32, 4, false, 4);
+  case BuiltinType::SveInt64:
+    return SVE_INT_ELTTY(64, 2, true, 1);
+  case BuiltinType::SveUint64:
+    return SVE_INT_ELTTY(64, 2, false, 1);
+  case BuiltinType::SveInt64x2:
+    return SVE_INT_ELTTY(64, 2, true, 2);
+  case BuiltinType::SveUint64x2:
+    return SVE_INT_ELTTY(64, 2, false, 2);
+  case BuiltinType::SveInt64x3:
+    return SVE_INT_ELTTY(64, 2, true, 3);
+  case BuiltinType::SveUint64x3:
+    return SVE_INT_ELTTY(64, 2, false, 3);
+  case BuiltinType::SveInt64x4:
+    return SVE_INT_ELTTY(64, 2, true, 4);
+  case BuiltinType::SveUint64x4:
+    return SVE_INT_ELTTY(64, 2, false, 4);
+  case BuiltinType::SveBool:
+    return SVE_ELTTY(BoolTy, 16, 1);
+  case BuiltinType::SveFloat16:
+    return SVE_ELTTY(HalfTy, 8, 1);
+  case BuiltinType::SveFloat16x2:
+    return SVE_ELTTY(HalfTy, 8, 2);
+  case BuiltinType::SveFloat16x3:
+    return SVE_ELTTY(HalfTy, 8, 3);
+  case BuiltinType::SveFloat16x4:
+    return SVE_ELTTY(HalfTy, 8, 4);
+  case BuiltinType::SveFloat32:
+    return SVE_ELTTY(FloatTy, 4, 1);
+  case BuiltinType::SveFloat32x2:
+    return SVE_ELTTY(FloatTy, 4, 2);
+  case BuiltinType::SveFloat32x3:
+    return SVE_ELTTY(FloatTy, 4, 3);
+  case BuiltinType::SveFloat32x4:
+    return SVE_ELTTY(FloatTy, 4, 4);
+  case BuiltinType::SveFloat64:
+    return SVE_ELTTY(DoubleTy, 2, 1);
+  case BuiltinType::SveFloat64x2:
+    return SVE_ELTTY(DoubleTy, 2, 2);
+  case BuiltinType::SveFloat64x3:
+    return SVE_ELTTY(DoubleTy, 2, 3);
+  case BuiltinType::SveFloat64x4:
+    return SVE_ELTTY(DoubleTy, 2, 4);
+  case BuiltinType::SveBFloat16:
+    return SVE_ELTTY(BFloat16Ty, 8, 1);
+  case BuiltinType::SveBFloat16x2:
+    return SVE_ELTTY(BFloat16Ty, 8, 2);
+  case BuiltinType::SveBFloat16x3:
+    return SVE_ELTTY(BFloat16Ty, 8, 3);
+  case BuiltinType::SveBFloat16x4:
+    return SVE_ELTTY(BFloat16Ty, 8, 4);
+  }
+}
+
 /// getScalableVectorType - Return the unique reference to a scalable vector
 /// type of the specified element type and size. VectorType must be a built-in
 /// type.
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@ -1487,6 +1487,13 @@ static bool shouldBeHidden(NamedDecl *D) {
    if (FD->isFunctionTemplateSpecialization())
      return true;

+  // Hide destructors that are invalid. There should always be one destructor,
+  // but if it is an invalid decl, another one is created. We need to hide the
+  // invalid one from places that expect exactly one destructor, like the
+  // serialization code.
+  if (isa<CXXDestructorDecl>(D) && D->isInvalidDecl())
+    return true;
+
  return false;
 }

--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@ -3248,7 +3248,7 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) {
  case BuiltinType::Double:
    return "Float64";
  case BuiltinType::BFloat16:
-    return "BFloat16";
+    return "Bfloat16";
  default:
    llvm_unreachable("Unexpected vector element base type");
  }
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@ -409,8 +409,6 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
      return new SolarisTargetInfo<SparcV8TargetInfo>(Triple, Opts);
    case llvm::Triple::NetBSD:
      return new NetBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
-    case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8TargetInfo>(Triple, Opts);
    case llvm::Triple::RTEMS:
      return new RTEMSTargetInfo<SparcV8TargetInfo>(Triple, Opts);
    default:
@ -424,8 +422,6 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple,
      return new LinuxTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
    case llvm::Triple::NetBSD:
      return new NetBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
-    case llvm::Triple::OpenBSD:
-      return new OpenBSDTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
    case llvm::Triple::RTEMS:
      return new RTEMSTargetInfo<SparcV8elTargetInfo>(Triple, Opts);
    default:
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@ -719,23 +719,39 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
  case BuiltinType::Id: \
    return getOrCreateStructPtrType("opencl_" #ExtType, Id##Ty);
 #include "clang/Basic/OpenCLExtensionTypes.def"
-  // TODO: real support for SVE types requires more infrastructure
-  // to be added first.  The types have a variable length and are
-  // represented in debug info as types whose length depends on a
-  // target-specific pseudo register.
-#define SVE_TYPE(Name, Id, SingletonId) \
-  case BuiltinType::Id:
-#include "clang/Basic/AArch64SVEACLETypes.def"
-  {
-    unsigned DiagID = CGM.getDiags().getCustomDiagID(
-        DiagnosticsEngine::Error,
-        "cannot yet generate debug info for SVE type '%0'");
-    auto Name = BT->getName(CGM.getContext().getPrintingPolicy());
-    CGM.getDiags().Report(DiagID) << Name;
-    // Return something safe.
-    return CreateType(cast<const BuiltinType>(CGM.getContext().IntTy));
-  }

+#define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#include "clang/Basic/AArch64SVEACLETypes.def"
+    {
+      ASTContext::BuiltinVectorTypeInfo Info =
+          CGM.getContext().getBuiltinVectorTypeInfo(BT);
+      unsigned NumElemsPerVG = (Info.EC.Min * Info.NumVectors) / 2;
+
+      // Debuggers can't extract 1bit from a vector, so will display a
+      // bitpattern for svbool_t instead.
+      if (Info.ElementType == CGM.getContext().BoolTy) {
+        NumElemsPerVG /= 8;
+        Info.ElementType = CGM.getContext().UnsignedCharTy;
+      }
+
+      auto *LowerBound =
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::getSigned(
+              llvm::Type::getInt64Ty(CGM.getLLVMContext()), 0));
+      SmallVector<int64_t, 9> Expr(
+          {llvm::dwarf::DW_OP_constu, NumElemsPerVG, llvm::dwarf::DW_OP_bregx,
+           /* AArch64::VG */ 46, 0, llvm::dwarf::DW_OP_mul,
+           llvm::dwarf::DW_OP_constu, 1, llvm::dwarf::DW_OP_minus});
+      auto *UpperBound = DBuilder.createExpression(Expr);
+
+      llvm::Metadata *Subscript = DBuilder.getOrCreateSubrange(
+          /*count*/ nullptr, LowerBound, UpperBound, /*stride*/ nullptr);
+      llvm::DINodeArray SubscriptArray = DBuilder.getOrCreateArray(Subscript);
+      llvm::DIType *ElemTy =
+          getOrCreateType(Info.ElementType, TheCU->getFile());
+      auto Align = getTypeAlignIfRequired(BT, CGM.getContext());
+      return DBuilder.createVectorType(/*Size*/ 0, Align, ElemTy,
+                                       SubscriptArray);
+    }
  case BuiltinType::UChar:
  case BuiltinType::Char_U:
    Encoding = llvm::dwarf::DW_ATE_unsigned_char;
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@ -29,7 +29,6 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalValue.h"
@ -1064,23 +1063,6 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
                                 StringRef Separator)
    : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
      OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
-  ASTContext &C = CGM.getContext();
-  RecordDecl *RD = C.buildImplicitRecord("ident_t");
-  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
-  RD->startDefinition();
-  // reserved_1
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // flags
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // reserved_2
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // reserved_3
-  addFieldToRecordDecl(C, RD, KmpInt32Ty);
-  // psource
-  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
-  RD->completeDefinition();
-  IdentQTy = C.getRecordType(RD);
-  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);

  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
@ -1397,39 +1379,6 @@ createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
  Fields.finishAndAddTo(Parent);
 }

-Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
-  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
-  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
-  FlagsTy FlagsKey(Flags, Reserved2Flags);
-  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
-  if (!Entry) {
-    if (!DefaultOpenMPPSource) {
-      // Initialize default location for psource field of ident_t structure of
-      // all ident_t objects. Format is ";file;function;line;column;;".
-      // Taken from
-      // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
-      DefaultOpenMPPSource =
-          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
-      DefaultOpenMPPSource =
-          llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
-    }
-
-    llvm::Constant *Data[] = {
-        llvm::ConstantInt::getNullValue(CGM.Int32Ty),
-        llvm::ConstantInt::get(CGM.Int32Ty, Flags),
-        llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
-        llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
-    llvm::GlobalValue *DefaultOpenMPLocation =
-        createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
-                           llvm::GlobalValue::PrivateLinkage);
-    DefaultOpenMPLocation->setUnnamedAddr(
-        llvm::GlobalValue::UnnamedAddr::Global);
-
-    OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
-  }
-  return Address(Entry, Align);
-}
-
 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
                                             bool AtCurrentPoint) {
  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
@ -1458,62 +1407,24 @@ void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                 SourceLocation Loc,
                                                 unsigned Flags) {
-  Flags |= OMP_IDENT_KMPC;
-  // If no debug info is generated - return global default location.
+  llvm::Constant *SrcLocStr;
  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
-      Loc.isInvalid())
-    return getOrCreateDefaultLocation(Flags).getPointer();
-
-  assert(CGF.CurFn && "No function in current CodeGenFunction.");
-
-  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
-  Address LocValue = Address::invalid();
-  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
-  if (I != OpenMPLocThreadIDMap.end())
-    LocValue = Address(I->second.DebugLoc, Align);
-
-  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
-  // GetOpenMPThreadID was called before this routine.
-  if (!LocValue.isValid()) {
-    // Generate "ident_t .kmpc_loc.addr;"
-    Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
-    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-    Elem.second.DebugLoc = AI.getPointer();
-    LocValue = AI;
-
-    if (!Elem.second.ServiceInsertPt)
-      setLocThreadIdInsertPt(CGF);
-    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-    CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
-    CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
-                             CGF.getTypeSize(IdentQTy));
-  }
-
-  // char **psource = &.kmpc_loc_<flags>.addr.psource;
-  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
-  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
-  LValue PSource =
-      CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
-
-  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
-  if (OMPDebugLoc == nullptr) {
-    SmallString<128> Buffer2;
-    llvm::raw_svector_ostream OS2(Buffer2);
-    // Build debug location
-    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
-    OS2 << ";" << PLoc.getFilename() << ";";
+      Loc.isInvalid()) {
+    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
+  } else {
+    std::string FunctionName = "";
    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
-      OS2 << FD->getQualifiedNameAsString();
-    OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
-    OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
-    OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
+      FunctionName = FD->getQualifiedNameAsString();
+    PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+    const char *FileName = PLoc.getFilename();
+    unsigned Line = PLoc.getLine();
+    unsigned Column = PLoc.getColumn();
+    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName,
+                                                Line, Column);
  }
-  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
-  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
-
-  // Our callers always pass this to a runtime function, so for
-  // convenience, go ahead and return a naked pointer.
-  return LocValue.getPointer();
+  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
+  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
+                                     Reserved2Flags);
 }

 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
@ -1595,7 +1506,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
 }

 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
-  return IdentTy->getPointerTo();
+  return OMPBuilder.IdentPtr;
 }

 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
@ -7354,6 +7265,8 @@ class MappableExprsHandler {
    // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
    //
    // map(p[1:24])
+    // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
+    // in unified shared memory mode or for local pointers
    // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
    //
    // map(s)
@ -7489,6 +7402,7 @@ class MappableExprsHandler {
    // Track if the map information being generated is the first for a list of
    // components.
    bool IsExpressionFirstInfo = true;
+    bool FirstPointerInComplexData = false;
    Address BP = Address::invalid();
    const Expr *AssocExpr = I->getAssociatedExpression();
    const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
@ -7531,10 +7445,15 @@ class MappableExprsHandler {
      QualType Ty =
          I->getAssociatedDeclaration()->getType().getNonReferenceType();
      if (Ty->isAnyPointerType() && std::next(I) != CE) {
-        BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
-
-        // We do not need to generate individual map information for the
-        // pointer, it can be associated with the combined storage.
+        // No need to generate individual map information for the pointer, it
+        // can be associated with the combined storage if shared memory mode is
+        // active or the base declaration is not global variable.
+        const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
+         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+            !VD || VD->hasLocalStorage())
+          BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
+        else
+          FirstPointerInComplexData = IsCaptureFirstInfo;
        ++I;
      }
    }
@ -7570,8 +7489,19 @@ class MappableExprsHandler {
        EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
        // If we encounter a PTR_AND_OBJ entry from now on it should be marked
        // as MEMBER_OF the parent struct.
-        if (EncounteredME)
+        if (EncounteredME) {
          ShouldBeMemberOf = true;
+          // Do not emit as complex pointer if this is actually not array-like
+          // expression.
+          if (FirstPointerInComplexData) {
+            QualType Ty = std::prev(I)
+                              ->getAssociatedDeclaration()
+                              ->getType()
+                              .getNonReferenceType();
+            BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
+            FirstPointerInComplexData = false;
+          }
+        }
      }

      auto Next = std::next(I);
@ -7704,10 +7634,11 @@ class MappableExprsHandler {
          // same expression except for the first one. We also need to signal
          // this map is the first one that relates with the current capture
          // (there is a set of entries for each capture).
-          OpenMPOffloadMappingFlags Flags = getMapTypeBits(
-              MapType, MapModifiers, IsImplicit,
-              !IsExpressionFirstInfo || RequiresReference,
-              IsCaptureFirstInfo && !RequiresReference);
+          OpenMPOffloadMappingFlags Flags =
+              getMapTypeBits(MapType, MapModifiers, IsImplicit,
+                             !IsExpressionFirstInfo || RequiresReference ||
+                                 FirstPointerInComplexData,
+                             IsCaptureFirstInfo && !RequiresReference);

          if (!IsExpressionFirstInfo) {
            // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@ -7765,6 +7696,7 @@ class MappableExprsHandler {

        IsExpressionFirstInfo = false;
        IsCaptureFirstInfo = false;
+        FirstPointerInComplexData = false;
      }
    }
  }
@ -7995,6 +7927,10 @@ class MappableExprsHandler {
    // emission of that entry until the whole struct has been processed.
    llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
        DeferredInfo;
+    MapBaseValuesArrayTy UseDevicePtrBasePointers;
+    MapValuesArrayTy UseDevicePtrPointers;
+    MapValuesArrayTy UseDevicePtrSizes;
+    MapFlagsArrayTy UseDevicePtrTypes;

    for (const auto *C :
         CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
@ -8011,15 +7947,27 @@ class MappableExprsHandler {
        // We potentially have map information for this declaration already.
        // Look for the first set of components that refer to it.
        if (It != Info.end()) {
-          auto CI = std::find_if(
-              It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
-                return MI.Components.back().getAssociatedDeclaration() == VD;
-              });
+          auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) {
+            return MI.Components.back().getAssociatedDeclaration() == VD;
+          });
          // If we found a map entry, signal that the pointer has to be returned
          // and move on to the next declaration.
+          // Exclude cases where the base pointer is mapped as array subscript,
+          // array section or array shaping. The base address is passed as a
+          // pointer to base in this case and cannot be used as a base for
+          // use_device_ptr list item.
          if (CI != It->second.end()) {
-            CI->ReturnDevicePointer = true;
-            continue;
+            auto PrevCI = std::next(CI->Components.rbegin());
+            const auto *VarD = dyn_cast<VarDecl>(VD);
+            if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+                isa<MemberExpr>(IE) ||
+                !VD->getType().getNonReferenceType()->isPointerType() ||
+                PrevCI == CI->Components.rend() ||
+                isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
+                VarD->hasLocalStorage()) {
+              CI->ReturnDevicePointer = true;
+              continue;
+            }
          }
        }

@ -8040,10 +7988,12 @@ class MappableExprsHandler {
        } else {
          llvm::Value *Ptr =
              CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          UseDevicePtrBasePointers.emplace_back(Ptr, VD);
+          UseDevicePtrPointers.push_back(Ptr);
+          UseDevicePtrSizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
+                                      OMP_MAP_TARGET_PARAM);
        }
      }
    }
@ -8104,10 +8054,12 @@ class MappableExprsHandler {
            Ptr = CGF.EmitLValue(IE).getPointer(CGF);
          else
            Ptr = CGF.EmitScalarExpr(IE);
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          UseDevicePtrBasePointers.emplace_back(Ptr, VD);
+          UseDevicePtrPointers.push_back(Ptr);
+          UseDevicePtrSizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
+                                      OMP_MAP_TARGET_PARAM);
        }
      }
    }
@ -8197,6 +8149,12 @@ class MappableExprsHandler {
      Sizes.append(CurSizes.begin(), CurSizes.end());
      Types.append(CurTypes.begin(), CurTypes.end());
    }
+    // Append data for use_device_ptr clauses.
+    BasePointers.append(UseDevicePtrBasePointers.begin(),
+                        UseDevicePtrBasePointers.end());
+    Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
+    Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
+    Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
  }

  /// Generate all the base pointers, section pointers, sizes and map types for
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@ -374,17 +374,7 @@ class CGOpenMPRuntime {
 private:
  /// An OpenMP-IR-Builder instance.
  llvm::OpenMPIRBuilder OMPBuilder;
-  /// Default const ident_t object used for initialization of all other
-  /// ident_t objects.
-  llvm::Constant *DefaultOpenMPPSource = nullptr;
-  using FlagsTy = std::pair<unsigned, unsigned>;
-  /// Map of flags and corresponding default locations.
-  using OpenMPDefaultLocMapTy = llvm::DenseMap<FlagsTy, llvm::Value *>;
-  OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  Address getOrCreateDefaultLocation(unsigned Flags);

-  QualType IdentQTy;
-  llvm::StructType *IdentTy = nullptr;
  /// Map for SourceLocation and OpenMP runtime library debug locations.
  typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
  OpenMPDebugLocMapTy OpenMPDebugLocMap;
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@ -2857,8 +2857,12 @@ static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
  Address CastItem = CGF.CreateMemTemp(CastTy);
  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
-  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
-  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
+  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy,
+                        LValueBaseInfo(AlignmentSource::Type),
+                        TBAAAccessInfo());
+  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc,
+                              LValueBaseInfo(AlignmentSource::Type),
+                              TBAAAccessInfo());
 }

 /// This function creates calls to one of two shuffle functions to copy
@ -2945,9 +2949,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
                       ThenBB, ExitBB);
      CGF.EmitBlock(ThenBB);
      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
      Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
      Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
      PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
@ -2956,9 +2965,14 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
      CGF.EmitBlock(ExitBB);
    } else {
      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          CGF,
+          CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc,
+                               LValueBaseInfo(AlignmentSource::Type),
+                               TBAAAccessInfo()),
          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
      Ptr = Bld.CreateConstGEP(Ptr, 1);
      ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
    }
@ -3112,12 +3126,14 @@ static void emitReductionListCopy(
    } else {
      switch (CGF.getEvaluationKind(Private->getType())) {
      case TEK_Scalar: {
-        llvm::Value *Elem =
-            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                                 Private->getType(), Private->getExprLoc());
+        llvm::Value *Elem = CGF.EmitLoadOfScalar(
+            SrcElementAddr, /*Volatile=*/false, Private->getType(),
+            Private->getExprLoc(), LValueBaseInfo(AlignmentSource::Type),
+            TBAAAccessInfo());
        // Store the source element value to the dest element address.
-        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
-                              Private->getType());
+        CGF.EmitStoreOfScalar(
+            Elem, DestElementAddr, /*Volatile=*/false, Private->getType(),
+            LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
        break;
      }
      case TEK_Complex: {
@ -3260,8 +3276,9 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
  Address LocalReduceList(
      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
+          CGF.EmitLoadOfScalar(
+              AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc,
+              LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo()),
          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
      CGF.getPointerAlign());

@ -3339,10 +3356,13 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,

      // elem = *elemptr
      //*MediumPtr = elem
-      llvm::Value *Elem =
-          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+      llvm::Value *Elem = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, CType, Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
      // Store the source element value to the dest element address.
-      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType,
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());

      Bld.CreateBr(MergeBB);

@ -3722,8 +3742,9 @@ static llvm::Value *emitListToGlobalCopyFunction(
    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
    switch (CGF.getEvaluationKind(Private->getType())) {
    case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
-                                            Private->getType(), Loc);
+      llvm::Value *V = CGF.EmitLoadOfScalar(
+          ElemPtr, /*Volatile=*/false, Private->getType(), Loc,
+          LValueBaseInfo(AlignmentSource::Type), TBAAAccessInfo());
      CGF.EmitStoreOfScalar(V, GlobLVal);
      break;
    }
@ -3926,7 +3947,9 @@ static llvm::Value *emitGlobalToListCopyFunction(
    switch (CGF.getEvaluationKind(Private->getType())) {
    case TEK_Scalar: {
      llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
-      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
+      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType(),
+                            LValueBaseInfo(AlignmentSource::Type),
+                            TBAAAccessInfo());
      break;
    }
    case TEK_Complex: {
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@ -533,99 +533,60 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
    case BuiltinType::OCLReserveID:
      ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
      break;
-#define GET_SVE_INT_VEC(BITS, ELTS)                                            \
-  llvm::ScalableVectorType::get(                                               \
-      llvm::IntegerType::get(getLLVMContext(), BITS), ELTS);
    case BuiltinType::SveInt8:
    case BuiltinType::SveUint8:
-      return GET_SVE_INT_VEC(8, 16);
    case BuiltinType::SveInt8x2:
    case BuiltinType::SveUint8x2:
-      return GET_SVE_INT_VEC(8, 32);
    case BuiltinType::SveInt8x3:
    case BuiltinType::SveUint8x3:
-      return GET_SVE_INT_VEC(8, 48);
    case BuiltinType::SveInt8x4:
    case BuiltinType::SveUint8x4:
-      return GET_SVE_INT_VEC(8, 64);
    case BuiltinType::SveInt16:
    case BuiltinType::SveUint16:
-      return GET_SVE_INT_VEC(16, 8);
    case BuiltinType::SveInt16x2:
    case BuiltinType::SveUint16x2:
-      return GET_SVE_INT_VEC(16, 16);
    case BuiltinType::SveInt16x3:
    case BuiltinType::SveUint16x3:
-      return GET_SVE_INT_VEC(16, 24);
    case BuiltinType::SveInt16x4:
    case BuiltinType::SveUint16x4:
-      return GET_SVE_INT_VEC(16, 32);
    case BuiltinType::SveInt32:
    case BuiltinType::SveUint32:
-      return GET_SVE_INT_VEC(32, 4);
    case BuiltinType::SveInt32x2:
    case BuiltinType::SveUint32x2:
-      return GET_SVE_INT_VEC(32, 8);
    case BuiltinType::SveInt32x3:
    case BuiltinType::SveUint32x3:
-      return GET_SVE_INT_VEC(32, 12);
    case BuiltinType::SveInt32x4:
    case BuiltinType::SveUint32x4:
-      return GET_SVE_INT_VEC(32, 16);
    case BuiltinType::SveInt64:
    case BuiltinType::SveUint64:
-      return GET_SVE_INT_VEC(64, 2);
    case BuiltinType::SveInt64x2:
    case BuiltinType::SveUint64x2:
-      return GET_SVE_INT_VEC(64, 4);
    case BuiltinType::SveInt64x3:
    case BuiltinType::SveUint64x3:
-      return GET_SVE_INT_VEC(64, 6);
    case BuiltinType::SveInt64x4:
    case BuiltinType::SveUint64x4:
-      return GET_SVE_INT_VEC(64, 8);
    case BuiltinType::SveBool:
-      return GET_SVE_INT_VEC(1, 16);
-#undef GET_SVE_INT_VEC
-#define GET_SVE_FP_VEC(TY, ISFP16, ELTS)                                       \
-  llvm::ScalableVectorType::get(                                               \
-      getTypeForFormat(getLLVMContext(),                                       \
-                       Context.getFloatTypeSemantics(Context.TY),              \
-                       /* UseNativeHalf = */ ISFP16),                          \
-      ELTS);
    case BuiltinType::SveFloat16:
-      return GET_SVE_FP_VEC(HalfTy, true, 8);
    case BuiltinType::SveFloat16x2:
-      return GET_SVE_FP_VEC(HalfTy, true, 16);
    case BuiltinType::SveFloat16x3:
-      return GET_SVE_FP_VEC(HalfTy, true, 24);
    case BuiltinType::SveFloat16x4:
-      return GET_SVE_FP_VEC(HalfTy, true, 32);
    case BuiltinType::SveFloat32:
-      return GET_SVE_FP_VEC(FloatTy, false, 4);
    case BuiltinType::SveFloat32x2:
-      return GET_SVE_FP_VEC(FloatTy, false, 8);
    case BuiltinType::SveFloat32x3:
-      return GET_SVE_FP_VEC(FloatTy, false, 12);
    case BuiltinType::SveFloat32x4:
-      return GET_SVE_FP_VEC(FloatTy, false, 16);
    case BuiltinType::SveFloat64:
-      return GET_SVE_FP_VEC(DoubleTy, false, 2);
    case BuiltinType::SveFloat64x2:
-      return GET_SVE_FP_VEC(DoubleTy, false, 4);
    case BuiltinType::SveFloat64x3:
-      return GET_SVE_FP_VEC(DoubleTy, false, 6);
    case BuiltinType::SveFloat64x4:
-      return GET_SVE_FP_VEC(DoubleTy, false, 8);
    case BuiltinType::SveBFloat16:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 8);
    case BuiltinType::SveBFloat16x2:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 16);
    case BuiltinType::SveBFloat16x3:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 24);
-    case BuiltinType::SveBFloat16x4:
-      return GET_SVE_FP_VEC(BFloat16Ty, false, 32);
-#undef GET_SVE_FP_VEC
+    case BuiltinType::SveBFloat16x4: {
+      ASTContext::BuiltinVectorTypeInfo Info =
+          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
+                                           Info.EC.Min * Info.NumVectors);
+    }
    case BuiltinType::Dependent:
 #define BUILTIN_TYPE(Id, SingletonId)
 #define PLACEHOLDER_TYPE(Id, SingletonId) \
--- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
@ -21,12 +21,19 @@ using namespace llvm::opt;
 const char *sparc::getSparcAsmModeForCPU(StringRef Name,
                                         const llvm::Triple &Triple) {
  if (Triple.getArch() == llvm::Triple::sparcv9) {
+    const char *DefV9CPU;
+
+    if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD())
+      DefV9CPU = "-Av9a";
+    else
+      DefV9CPU = "-Av9";
+
    return llvm::StringSwitch<const char *>(Name)
        .Case("niagara", "-Av9b")
        .Case("niagara2", "-Av9b")
        .Case("niagara3", "-Av9d")
        .Case("niagara4", "-Av9d")
-        .Default("-Av9");
+        .Default(DefV9CPU);
  } else {
    return llvm::StringSwitch<const char *>(Name)
        .Case("v8", "-Av8")
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@ -498,7 +498,7 @@ static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
    return codegenoptions::DebugLineTablesOnly;
  if (A.getOption().matches(options::OPT_gline_directives_only))
    return codegenoptions::DebugDirectivesOnly;
-  return codegenoptions::DebugInfoConstructor;
+  return codegenoptions::LimitedDebugInfo;
 }

 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
@ -2380,7 +2380,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
          CmdArgs.push_back(Value.data());
        } else {
          RenderDebugEnablingArgs(Args, CmdArgs,
-                                  codegenoptions::DebugInfoConstructor,
+                                  codegenoptions::LimitedDebugInfo,
                                  DwarfVersion, llvm::DebuggerKind::Default);
        }
      } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
@ -3653,7 +3653,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
  if (const Arg *A =
          Args.getLastArg(options::OPT_g_Group, options::OPT_gsplit_dwarf,
                          options::OPT_gsplit_dwarf_EQ)) {
-    DebugInfoKind = codegenoptions::DebugInfoConstructor;
+    DebugInfoKind = codegenoptions::LimitedDebugInfo;

    // If the last option explicitly specified a debug-info level, use it.
    if (checkDebugInfoOption(A, Args, D, TC) &&
@ -3758,7 +3758,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
    if (checkDebugInfoOption(A, Args, D, TC)) {
      if (DebugInfoKind != codegenoptions::DebugLineTablesOnly &&
          DebugInfoKind != codegenoptions::DebugDirectivesOnly) {
-        DebugInfoKind = codegenoptions::DebugInfoConstructor;
+        DebugInfoKind = codegenoptions::LimitedDebugInfo;
        CmdArgs.push_back("-dwarf-ext-refs");
        CmdArgs.push_back("-fmodule-format=obj");
      }
@ -3778,9 +3778,7 @@ static void RenderDebugOptions(const ToolChain &TC, const Driver &D,
          TC.GetDefaultStandaloneDebug());
  if (const Arg *A = Args.getLastArg(options::OPT_fstandalone_debug))
    (void)checkDebugInfoOption(A, Args, D, TC);
-  if ((DebugInfoKind == codegenoptions::LimitedDebugInfo ||
-       DebugInfoKind == codegenoptions::DebugInfoConstructor) &&
-      NeedFullDebug)
+  if (DebugInfoKind == codegenoptions::LimitedDebugInfo && NeedFullDebug)
    DebugInfoKind = codegenoptions::FullDebugInfo;

  if (Args.hasFlag(options::OPT_gembed_source, options::OPT_gno_embed_source,
@ -6566,7 +6564,7 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
                          options::OPT_gline_tables_only)) {
    *EmitCodeView = true;
    if (DebugInfoArg->getOption().matches(options::OPT__SLASH_Z7))
-      *DebugInfoKind = codegenoptions::DebugInfoConstructor;
+      *DebugInfoKind = codegenoptions::LimitedDebugInfo;
    else
      *DebugInfoKind = codegenoptions::DebugLineTablesOnly;
  } else {
@ -6863,7 +6861,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
    // the guard for source type, however there is a test which asserts
    // that some assembler invocation receives no -debug-info-kind,
    // and it's not clear whether that test is just overly restrictive.
-    DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor
+    DebugInfoKind = (WantDebug ? codegenoptions::LimitedDebugInfo
                               : codegenoptions::NoDebugInfo);
    // Add the -fdebug-compilation-dir flag if needed.
    addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS());
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@ -10,10 +10,12 @@
 #include "Arch/Mips.h"
 #include "Arch/Sparc.h"
 #include "CommonArgs.h"
+#include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/SanitizerArgs.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"

 using namespace clang::driver;
 using namespace clang::driver::tools;
@ -41,15 +43,6 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
    CmdArgs.push_back("-many");
    break;

-  case llvm::Triple::sparc:
-  case llvm::Triple::sparcel: {
-    CmdArgs.push_back("-32");
-    std::string CPU = getCPUName(Args, getToolChain().getTriple());
-    CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
-    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
-    break;
-  }
-
  case llvm::Triple::sparcv9: {
    CmdArgs.push_back("-64");
    std::string CPU = getCPUName(Args, getToolChain().getTriple());
@ -256,6 +249,45 @@ OpenBSD::OpenBSD(const Driver &D, const llvm::Triple &Triple,
  getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
 }

+void OpenBSD::AddClangSystemIncludeArgs(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args) const {
+  const Driver &D = getDriver();
+
+  if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
+    return;
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> Dir(D.ResourceDir);
+    llvm::sys::path::append(Dir, "include");
+    addSystemInclude(DriverArgs, CC1Args, Dir.str());
+  }
+
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  // Check for configure-time C include directories.
+  StringRef CIncludeDirs(C_INCLUDE_DIRS);
+  if (CIncludeDirs != "") {
+    SmallVector<StringRef, 5> dirs;
+    CIncludeDirs.split(dirs, ":");
+    for (StringRef dir : dirs) {
+      StringRef Prefix =
+          llvm::sys::path::is_absolute(dir) ? StringRef(D.SysRoot) : "";
+      addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
+    }
+    return;
+  }
+
+  addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
+}
+
+void OpenBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                                    llvm::opt::ArgStringList &CC1Args) const {
+  addSystemInclude(DriverArgs, CC1Args,
+                   getDriver().SysRoot + "/usr/include/c++/v1");
+}
+
 void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
  bool Profiling = Args.hasArg(options::OPT_pg);
@ -264,17 +296,18 @@ void OpenBSD::AddCXXStdlibLibArgs(const ArgList &Args,
  CmdArgs.push_back(Profiling ? "-lc++abi_p" : "-lc++abi");
 }

+std::string OpenBSD::getCompilerRT(const ArgList &Args,
+                                   StringRef Component,
+                                   FileType Type) const {
+  SmallString<128> Path(getDriver().SysRoot);
+  llvm::sys::path::append(Path, "/usr/lib/libcompiler_rt.a");
+  return std::string(Path.str());
+}
+
 Tool *OpenBSD::buildAssembler() const {
  return new tools::openbsd::Assembler(*this);
 }

 Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); }

-void OpenBSD::addClangTargetOptions(const ArgList &DriverArgs,
-                                    ArgStringList &CC1Args,
-                                    Action::OffloadKind) const {
-  // Support for .init_array is still new (Aug 2016).
-  if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
-                          options::OPT_fno_use_init_array, false))
-    CC1Args.push_back("-fno-use-init-array");
-}
+bool OpenBSD::HasNativeLLVMSupport() const { return true; }
--- a/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/clang/lib/Driver/ToolChains/OpenBSD.h
@ -54,6 +54,8 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
  OpenBSD(const Driver &D, const llvm::Triple &Triple,
          const llvm::opt::ArgList &Args);

+  bool HasNativeLLVMSupport() const override;
+
  bool IsMathErrnoDefault() const override { return false; }
  bool IsObjCNonFragileABIDefault() const override { return true; }
  bool isPIEDefault() const override { return true; }
@ -65,9 +67,18 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
    return ToolChain::CST_Libcxx;
  }

+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+
+  void addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args) const override;
  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                           llvm::opt::ArgStringList &CmdArgs) const override;

+  std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component,
+                            FileType Type = ToolChain::FT_Static) const override;
+
  unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
    return 2;
  }
@ -75,11 +86,6 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {

  SanitizerMask getSupportedSanitizers() const override;

-  void
-  addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                        llvm::opt::ArgStringList &CC1Args,
-                        Action::OffloadKind DeviceOffloadKind) const override;
-
 protected:
  Tool *buildAssembler() const override;
  Tool *buildLinker() const override;
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@ -56,6 +56,13 @@ static bool isLambdaParameterList(const FormatToken *Left) {
         Left->Previous->MatchingParen->is(TT_LambdaLSquare);
 }

+/// Returns \c true if the token is followed by a boolean condition, \c false
+/// otherwise.
+static bool isKeywordWithCondition(const FormatToken &Tok) {
+  return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
+                     tok::kw_constexpr, tok::kw_catch);
+}
+
 /// A parser that gathers additional information about tokens.
 ///
 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
@ -108,6 +115,12 @@ class AnnotatingParser {

    while (CurrentToken) {
      if (CurrentToken->is(tok::greater)) {
+        // Try to do a better job at looking for ">>" within the condition of
+        // a statement.
+        if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
+            Left->ParentBracket != tok::less &&
+            isKeywordWithCondition(*Line.First))
+          return false;
        Left->MatchingParen = CurrentToken;
        CurrentToken->MatchingParen = Left;
        // In TT_Proto, we must distignuish between:
@ -2733,13 +2746,6 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
          Right.ParameterCount > 0);
 }

-/// Returns \c true if the token is followed by a boolean condition, \c false
-/// otherwise.
-static bool isKeywordWithCondition(const FormatToken &Tok) {
-  return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
-                     tok::kw_constexpr, tok::kw_catch);
-}
-
 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
                                          const FormatToken &Left,
                                          const FormatToken &Right) {
--- a/clang/lib/Frontend/InitHeaderSearch.cpp
+++ b/clang/lib/Frontend/InitHeaderSearch.cpp
@ -270,6 +270,7 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
  case llvm::Triple::Linux:
  case llvm::Triple::Hurd:
  case llvm::Triple::Solaris:
+  case llvm::Triple::OpenBSD:
    llvm_unreachable("Include management is handled in the driver.");

  case llvm::Triple::CloudABI: {
@ -423,6 +424,7 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,
  case llvm::Triple::Emscripten:
  case llvm::Triple::Linux:
  case llvm::Triple::Hurd:
+  case llvm::Triple::OpenBSD:
  case llvm::Triple::Solaris:
  case llvm::Triple::WASI:
  case llvm::Triple::AIX:
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@ -1766,36 +1766,12 @@ vec_cmpne(vector unsigned int __a, vector unsigned int __b) {
                                                    (vector int)__b);
 }

-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector bool long long __a, vector bool long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector signed long long __a, vector signed long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
 static __inline__ vector bool int __ATTRS_o_ai
 vec_cmpne(vector float __a, vector float __b) {
  return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a,
                                                    (vector int)__b);
 }

-static __inline__ vector bool long long __ATTRS_o_ai
-vec_cmpne(vector double __a, vector double __b) {
-  return (vector bool long long)
-    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
-}
-
 /* vec_cmpnez */

 static __inline__ vector bool char __ATTRS_o_ai
@ -1900,6 +1876,86 @@ vec_parity_lsbb(vector signed long long __a) {
  return __builtin_altivec_vprtybd(__a);
 }

+#else
+/* vec_cmpne */
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector bool char __a, vector bool char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector signed char __a, vector signed char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool char __ATTRS_o_ai
+vec_cmpne(vector unsigned char __a, vector unsigned char __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector bool short __a, vector bool short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector signed short __a, vector signed short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool short __ATTRS_o_ai
+vec_cmpne(vector unsigned short __a, vector unsigned short __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector bool int __a, vector bool int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector signed int __a, vector signed int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector unsigned int __a, vector unsigned int __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+
+static __inline__ vector bool int __ATTRS_o_ai
+vec_cmpne(vector float __a, vector float __b) {
+  return ~(vec_cmpeq(__a, __b));
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector bool long long __a, vector bool long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector signed long long __a, vector signed long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
+#endif
+
+#ifdef __VSX__
+static __inline__ vector bool long long __ATTRS_o_ai
+vec_cmpne(vector double __a, vector double __b) {
+  return (vector bool long long)
+    ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b));
+}
 #endif

 /* vec_cmpgt */
@ -2702,67 +2758,67 @@ vec_insert_exp(vector unsigned int __a, vector unsigned int __b) {
 }

 #if defined(__powerpc64__)
-static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(signed char *__a,
+static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a,
                                                             size_t __b) {
  return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_len(unsigned char *__a, size_t __b) {
+vec_xl_len(const unsigned char *__a, size_t __b) {
  return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56));
 }

-static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(signed short *__a,
+static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a,
                                                              size_t __b) {
  return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector unsigned short __ATTRS_o_ai
-vec_xl_len(unsigned short *__a, size_t __b) {
+vec_xl_len(const unsigned short *__a, size_t __b) {
  return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56));
 }

-static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(signed int *__a,
+static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a,
                                                            size_t __b) {
  return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56));
 }

-static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(unsigned int *__a,
+static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a,
                                                              size_t __b) {
  return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56));
 }

-static __inline__ vector float __ATTRS_o_ai vec_xl_len(float *__a, size_t __b) {
+static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) {
  return (vector float)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector signed __int128 __ATTRS_o_ai
-vec_xl_len(signed __int128 *__a, size_t __b) {
+vec_xl_len(const signed __int128 *__a, size_t __b) {
  return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector unsigned __int128 __ATTRS_o_ai
-vec_xl_len(unsigned __int128 *__a, size_t __b) {
+vec_xl_len(const unsigned __int128 *__a, size_t __b) {
  return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector signed long long __ATTRS_o_ai
-vec_xl_len(signed long long *__a, size_t __b) {
+vec_xl_len(const signed long long *__a, size_t __b) {
  return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector unsigned long long __ATTRS_o_ai
-vec_xl_len(unsigned long long *__a, size_t __b) {
+vec_xl_len(const unsigned long long *__a, size_t __b) {
  return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56));
 }

-static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a,
+static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a,
                                                        size_t __b) {
  return (vector double)__builtin_vsx_lxvl(__a, (__b << 56));
 }

 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_len_r(unsigned char *__a, size_t __b) {
+vec_xl_len_r(const unsigned char *__a, size_t __b) {
  vector unsigned char __res =
      (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56));
 #ifdef __LITTLE_ENDIAN__
@ -16353,41 +16409,41 @@ typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1)));
 typedef vector float unaligned_vec_float __attribute__((aligned(1)));

 static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset,
-                                                     signed char *__ptr) {
+                                                     const signed char *__ptr) {
  return *(unaligned_vec_schar *)(__ptr + __offset);
 }

 static inline __ATTRS_o_ai vector unsigned char
-vec_xl(signed long long __offset, unsigned char *__ptr) {
+vec_xl(signed long long __offset, const unsigned char *__ptr) {
  return *(unaligned_vec_uchar*)(__ptr + __offset);
 }

 static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset,
-                                                      signed short *__ptr) {
+                                                      const signed short *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_sshort *)__addr;
 }

 static inline __ATTRS_o_ai vector unsigned short
-vec_xl(signed long long __offset, unsigned short *__ptr) {
+vec_xl(signed long long __offset, const unsigned short *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_ushort *)__addr;
 }

 static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset,
-                                                    signed int *__ptr) {
+                                                    const signed int *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_sint *)__addr;
 }

 static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset,
-                                                      unsigned int *__ptr) {
+                                                      const unsigned int *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_uint *)__addr;
 }

 static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset,
-                                               float *__ptr) {
+                                               const float *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_float *)__addr;
 }
@ -16398,19 +16454,19 @@ typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1)));
 typedef vector double unaligned_vec_double __attribute__((aligned(1)));

 static inline __ATTRS_o_ai vector signed long long
-vec_xl(signed long long __offset, signed long long *__ptr) {
+vec_xl(signed long long __offset, const signed long long *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_sll *)__addr;
 }

 static inline __ATTRS_o_ai vector unsigned long long
-vec_xl(signed long long __offset, unsigned long long *__ptr) {
+vec_xl(signed long long __offset, const unsigned long long *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_ull *)__addr;
 }

 static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset,
-                                                double *__ptr) {
+                                                const double *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_double *)__addr;
 }
@ -16421,13 +16477,13 @@ typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1)));
 typedef vector unsigned __int128 unaligned_vec_ui128
    __attribute__((aligned(1)));
 static inline __ATTRS_o_ai vector signed __int128
-vec_xl(signed long long __offset, signed __int128 *__ptr) {
+vec_xl(signed long long __offset, const signed __int128 *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_si128 *)__addr;
 }

 static inline __ATTRS_o_ai vector unsigned __int128
-vec_xl(signed long long __offset, unsigned __int128 *__ptr) {
+vec_xl(signed long long __offset, const unsigned __int128 *__ptr) {
  signed char *__addr = (signed char *)__ptr + __offset;
  return *(unaligned_vec_ui128 *)__addr;
 }
@ -16437,71 +16493,71 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) {

 #ifdef __LITTLE_ENDIAN__
 static __inline__ vector signed char __ATTRS_o_ai
-vec_xl_be(signed long long __offset, signed char *__ptr) {
+vec_xl_be(signed long long __offset, const signed char *__ptr) {
  vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
  return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
                                 13, 12, 11, 10, 9, 8);
 }

 static __inline__ vector unsigned char __ATTRS_o_ai
-vec_xl_be(signed long long __offset, unsigned char *__ptr) {
+vec_xl_be(signed long long __offset, const unsigned char *__ptr) {
  vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr);
  return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14,
                                 13, 12, 11, 10, 9, 8);
 }

 static __inline__ vector signed short  __ATTRS_o_ai
-vec_xl_be(signed long long __offset, signed short *__ptr) {
+vec_xl_be(signed long long __offset, const signed short *__ptr) {
  vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
  return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
 }

 static __inline__ vector unsigned short __ATTRS_o_ai
-vec_xl_be(signed long long __offset, unsigned short *__ptr) {
+vec_xl_be(signed long long __offset, const unsigned short *__ptr) {
  vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr);
  return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4);
 }

 static __inline__ vector signed int __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed int *__ptr) {
+vec_xl_be(signed long long  __offset, const signed int *__ptr) {
  return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }

 static __inline__ vector unsigned int __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned int *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned int *__ptr) {
  return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }

 static __inline__ vector float __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, float *__ptr) {
+vec_xl_be(signed long long  __offset, const float *__ptr) {
  return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr);
 }

 #ifdef __VSX__
 static __inline__ vector signed long long __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed long long *__ptr) {
+vec_xl_be(signed long long  __offset, const signed long long *__ptr) {
  return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }

 static __inline__ vector unsigned long long __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned long long *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned long long *__ptr) {
  return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }

 static __inline__ vector double __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, double *__ptr) {
+vec_xl_be(signed long long  __offset, const double *__ptr) {
  return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr);
 }
 #endif

 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 static __inline__ vector signed __int128 __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, signed __int128 *__ptr) {
+vec_xl_be(signed long long  __offset, const signed __int128 *__ptr) {
  return vec_xl(__offset, __ptr);
 }

 static __inline__ vector unsigned __int128 __ATTRS_o_ai
-vec_xl_be(signed long long  __offset, unsigned __int128 *__ptr) {
+vec_xl_be(signed long long  __offset, const unsigned __int128 *__ptr) {
  return vec_xl(__offset, __ptr);
 }
 #endif
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -4956,6 +4956,11 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
                ? 0
                : 1);

+  if (ValType->isExtIntType()) {
+    Diag(Ptr->getExprLoc(), diag::err_atomic_builtin_ext_int_prohibit);
+    return ExprError();
+  }
+
  return AE;
 }

--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@ -2053,6 +2053,13 @@ Decl *TemplateDeclInstantiator::VisitFunctionDecl(
    // typedef (C++ [dcl.typedef]p4).
    if (Previous.isSingleTagDecl())
      Previous.clear();
+
+    // Filter out previous declarations that don't match the scope. The only
+    // effect this has is to remove declarations found in inline namespaces
+    // for friend declarations with unqualified names.
+    SemaRef.FilterLookupForScope(Previous, DC, /*Scope*/ nullptr,
+                                 /*ConsiderLinkage*/ true,
+                                 QualifierLoc.hasQualifier());
  }

  SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous,
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@ -8880,11 +8880,8 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
    else if (!T.isTriviallyCopyableType(Context))
      // Some other non-trivially-copyable type (probably a C++ class)
      DisallowedKind = 7;
-    else if (auto *ExtTy = T->getAs<ExtIntType>()) {
-      if (ExtTy->getNumBits() < 8)
+    else if (T->isExtIntType()) {
        DisallowedKind = 8;
-      else if (!llvm::isPowerOf2_32(ExtTy->getNumBits()))
-        DisallowedKind = 9;
    }

    if (DisallowedKind != -1) {
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@ -583,7 +583,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
                           Reader.getContext());
  }
  D->setLocation(ThisDeclLoc);
-  D->setInvalidDecl(Record.readInt());
+  D->InvalidDecl = Record.readInt();
  if (Record.readInt()) { // hasAttrs
    AttrVec Attrs;
    Record.readAttributes(Attrs);
--- a/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CastValueChecker.cpp
@ -106,7 +106,7 @@ static const NoteTag *getNoteTag(CheckerContext &C,
                                 QualType CastToTy, const Expr *Object,
                                 bool CastSucceeds, bool IsKnownCast) {
  std::string CastToName =
-      CastInfo ? CastInfo->to()->getPointeeCXXRecordDecl()->getNameAsString()
+      CastInfo ? CastInfo->to()->getAsCXXRecordDecl()->getNameAsString()
               : CastToTy->getPointeeCXXRecordDecl()->getNameAsString();
  Object = Object->IgnoreParenImpCasts();

@ -135,6 +135,47 @@ static const NoteTag *getNoteTag(CheckerContext &C,
      /*IsPrunable=*/true);
 }

+static const NoteTag *getNoteTag(CheckerContext &C,
+                                 SmallVector<QualType, 4> CastToTyVec,
+                                 const Expr *Object,
+                                 bool IsKnownCast) {
+  Object = Object->IgnoreParenImpCasts();
+
+  return C.getNoteTag(
+      [=]() -> std::string {
+        SmallString<128> Msg;
+        llvm::raw_svector_ostream Out(Msg);
+
+        if (!IsKnownCast)
+          Out << "Assuming ";
+
+        if (const auto *DRE = dyn_cast<DeclRefExpr>(Object)) {
+          Out << '\'' << DRE->getDecl()->getNameAsString() << '\'';
+        } else if (const auto *ME = dyn_cast<MemberExpr>(Object)) {
+          Out << (IsKnownCast ? "Field '" : "field '")
+              << ME->getMemberDecl()->getNameAsString() << '\'';
+        } else {
+          Out << (IsKnownCast ? "The object" : "the object");
+        }
+        Out << " is";
+
+        bool First = true;
+        for (QualType CastToTy: CastToTyVec) {
+          std::string CastToName =
+            CastToTy->getAsCXXRecordDecl() ?
+            CastToTy->getAsCXXRecordDecl()->getNameAsString() :
+            CastToTy->getPointeeCXXRecordDecl()->getNameAsString();
+          Out << ' ' << ((CastToTyVec.size() == 1) ? "not" :
+                         (First ? "neither" : "nor")) << " a '" << CastToName
+              << '\'';
+          First = false;
+        }
+
+        return std::string(Out.str());
+      },
+      /*IsPrunable=*/true);
+}
+
 //===----------------------------------------------------------------------===//
 // Main logic to evaluate a cast.
 //===----------------------------------------------------------------------===//
@ -220,40 +261,76 @@ static void addInstanceOfTransition(const CallEvent &Call,
                                    bool IsInstanceOf) {
  const FunctionDecl *FD = Call.getDecl()->getAsFunction();
  QualType CastFromTy = Call.parameters()[0]->getType();
-  QualType CastToTy = FD->getTemplateSpecializationArgs()->get(0).getAsType();
-  if (CastFromTy->isPointerType())
-    CastToTy = C.getASTContext().getPointerType(CastToTy);
-  else if (CastFromTy->isReferenceType())
-    CastToTy = alignReferenceTypes(CastToTy, CastFromTy, C.getASTContext());
-  else
-    return;
-
-  const MemRegion *MR = DV.getAsRegion();
-  const DynamicCastInfo *CastInfo =
-      getDynamicCastInfo(State, MR, CastFromTy, CastToTy);
-
-  bool CastSucceeds;
-  if (CastInfo)
-    CastSucceeds = IsInstanceOf && CastInfo->succeeds();
-  else
-    CastSucceeds = IsInstanceOf || CastFromTy == CastToTy;
-
-  if (isInfeasibleCast(CastInfo, CastSucceeds)) {
-    C.generateSink(State, C.getPredecessor());
-    return;
+  SmallVector<QualType, 4> CastToTyVec;
+  for (unsigned idx = 0; idx < FD->getTemplateSpecializationArgs()->size() - 1;
+       ++idx) {
+    TemplateArgument CastToTempArg =
+      FD->getTemplateSpecializationArgs()->get(idx);
+    switch (CastToTempArg.getKind()) {
+    default:
+      return;
+    case TemplateArgument::Type:
+      CastToTyVec.push_back(CastToTempArg.getAsType());
+      break;
+    case TemplateArgument::Pack:
+      for (TemplateArgument ArgInPack: CastToTempArg.pack_elements())
+        CastToTyVec.push_back(ArgInPack.getAsType());
+      break;
+    }
  }

-  // Store the type and the cast information.
-  bool IsKnownCast = CastInfo || CastFromTy == CastToTy;
-  if (!IsKnownCast)
-    State = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy,
-                                      IsInstanceOf);
+  const MemRegion *MR = DV.getAsRegion();
+  if (MR && CastFromTy->isReferenceType())
+    MR = State->getSVal(DV.castAs<Loc>()).getAsRegion();

-  C.addTransition(
-      State->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
-                      C.getSValBuilder().makeTruthVal(CastSucceeds)),
-      getNoteTag(C, CastInfo, CastToTy, Call.getArgExpr(0), CastSucceeds,
-                 IsKnownCast));
+  bool Success = false;
+  bool IsAnyKnown = false;
+  for (QualType CastToTy: CastToTyVec) {
+    if (CastFromTy->isPointerType())
+      CastToTy = C.getASTContext().getPointerType(CastToTy);
+    else if (CastFromTy->isReferenceType())
+      CastToTy = alignReferenceTypes(CastToTy, CastFromTy, C.getASTContext());
+    else
+      return;
+
+    const DynamicCastInfo *CastInfo =
+      getDynamicCastInfo(State, MR, CastFromTy, CastToTy);
+
+    bool CastSucceeds;
+    if (CastInfo)
+      CastSucceeds = IsInstanceOf && CastInfo->succeeds();
+    else
+      CastSucceeds = IsInstanceOf || CastFromTy == CastToTy;
+
+    // Store the type and the cast information.
+    bool IsKnownCast = CastInfo || CastFromTy == CastToTy;
+    IsAnyKnown = IsAnyKnown || IsKnownCast;
+    ProgramStateRef NewState = State;
+    if (!IsKnownCast)
+      NewState = setDynamicTypeAndCastInfo(State, MR, CastFromTy, CastToTy,
+                                           IsInstanceOf);
+
+    if (CastSucceeds) {
+      Success = true;
+      C.addTransition(
+          NewState->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
+                             C.getSValBuilder().makeTruthVal(true)),
+          getNoteTag(C, CastInfo, CastToTy, Call.getArgExpr(0), true,
+                     IsKnownCast));
+      if (IsKnownCast)
+        return;
+    } else if (CastInfo && CastInfo->succeeds()) {
+      C.generateSink(NewState, C.getPredecessor());
+      return;
+    }
+  }
+
+  if (!Success) {
+    C.addTransition(
+        State->BindExpr(Call.getOriginExpr(), C.getLocationContext(),
+                        C.getSValBuilder().makeTruthVal(false)),
+        getNoteTag(C, CastToTyVec, Call.getArgExpr(0), IsAnyKnown));
+  }
 }

 //===----------------------------------------------------------------------===//
@ -402,8 +479,9 @@ bool CastValueChecker::evalCall(const CallEvent &Call,
    QualType ParamT = Call.parameters()[0]->getType();
    QualType ResultT = Call.getResultType();
    if (!(ParamT->isPointerType() && ResultT->isPointerType()) &&
-        !(ParamT->isReferenceType() && ResultT->isReferenceType()))
+        !(ParamT->isReferenceType() && ResultT->isReferenceType())) {
      return false;
+    }

    DV = Call.getArgSVal(0).getAs<DefinedOrUnknownSVal>();
    break;
--- a/clang/lib/StaticAnalyzer/Core/DynamicType.cpp
+++ b/clang/lib/StaticAnalyzer/Core/DynamicType.cpp
@ -65,6 +65,13 @@ const DynamicTypeInfo *getRawDynamicTypeInfo(ProgramStateRef State,
  return State->get<DynamicTypeMap>(MR);
 }

+static void unbox(QualType &Ty) {
+  // FIXME: Why are we being fed references to pointers in the first place?
+  while (Ty->isReferenceType() || Ty->isPointerType())
+    Ty = Ty->getPointeeType();
+  Ty = Ty.getCanonicalType().getUnqualifiedType();
+}
+
 const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State,
                                          const MemRegion *MR,
                                          QualType CastFromTy,
@ -73,6 +80,9 @@ const DynamicCastInfo *getDynamicCastInfo(ProgramStateRef State,
  if (!Lookup)
    return nullptr;

+  unbox(CastFromTy);
+  unbox(CastToTy);
+
  for (const DynamicCastInfo &Cast : *Lookup)
    if (Cast.equals(CastFromTy, CastToTy))
      return &Cast;
@ -112,6 +122,9 @@ ProgramStateRef setDynamicTypeAndCastInfo(ProgramStateRef State,
    State = State->set<DynamicTypeMap>(MR, CastToTy);
  }

+  unbox(CastFromTy);
+  unbox(CastToTy);
+
  DynamicCastInfo::CastResult ResultKind =
      CastSucceeds ? DynamicCastInfo::CastResult::Success
                   : DynamicCastInfo::CastResult::Failure;
--- a/compiler-rt/lib/builtins/clear_cache.c
+++ b/compiler-rt/lib/builtins/clear_cache.c
@ -33,7 +33,7 @@ uintptr_t GetCurrentProcess(void);
 #include <machine/sysarch.h>
 #endif

-#if defined(__OpenBSD__) && defined(__mips__)
+#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
 // clang-format off
 #include <sys/types.h>
 #include <machine/sysarch.h>
@ -58,7 +58,7 @@ void __clear_cache(void *start, void *end) {
 #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
  FlushInstructionCache(GetCurrentProcess(), start, end - start);
 #elif defined(__arm__) && !defined(__APPLE__)
-#if defined(__FreeBSD__) || defined(__NetBSD__)
+#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
  struct arm_sync_icache_args arg;

  arg.addr = (uintptr_t)start;
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@ -277,7 +277,7 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                                const unsigned *Features,
                                unsigned *Type, unsigned *Subtype) {
 #define testFeature(F)                                                         \
-  (Features[F / 32] & (F % 32)) != 0
+  (Features[F / 32] & (1 << (F % 32))) != 0

  // We select CPU strings to match the code in Host.cpp, but we don't use them
  // in compiler-rt.
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@ -210,22 +210,6 @@ static void write_64bit_value(uint64_t i) {
  write_32bit_value(hi);
 }

-static uint32_t length_of_string(const char *s) {
-  return (strlen(s) / 4) + 1;
-}
-
-// Remove when we support libgcov 9 current_working_directory.
-#if !defined(_MSC_VER) && defined(__clang__)
-__attribute__((unused))
-#endif
-static void
-write_string(const char *s) {
-  uint32_t len = length_of_string(s);
-  write_32bit_value(len);
-  write_bytes(s, strlen(s));
-  write_bytes("\0\0\0\0", 4 - (strlen(s) % 4));
-}
-
 static uint32_t read_32bit_value() {
  uint32_t val;

@ -632,6 +616,9 @@ void llvm_writeout_files(void) {
 // __attribute__((destructor)) and destructors whose priorities are greater than
 // 100 run before this function and can thus be tracked. The priority is
 // compatible with GCC 7 onwards.
+#if __GNUC__ >= 9
+#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
+#endif
 __attribute__((destructor(100)))
 #endif
 static void llvm_writeout_and_clear(void) {
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h
@ -129,6 +129,12 @@ struct __sanitizer_shmid_ds {
  void *_shm_internal;
 };

+struct __sanitizer_protoent {
+  char *p_name;
+  char **p_aliases;
+  int p_proto;
+};
+
 struct __sanitizer_netent {
  char *n_name;
  char **n_aliases;
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@ -473,8 +473,8 @@ static bool checkAddrInSegment(const Elf_Phdr *phdr, size_t image_base,
  return false;
 }

-int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
-                             void *data) {
+static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo,
+                                    size_t pinfo_size, void *data) {
  auto cbdata = static_cast<dl_iterate_cb_data *>(data);
  if (pinfo->dlpi_phnum == 0 || cbdata->targetAddr < pinfo->dlpi_addr)
    return 0;
@ -523,7 +523,8 @@ int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t pinfo_size,
 // Given all the #ifdef's above, the code here is for
 // defined(LIBUNWIND_ARM_EHABI)

-int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) {
+static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t,
+                                    void *data) {
  auto *cbdata = static_cast<dl_iterate_cb_data *>(data);
  bool found_obj = false;
  bool found_hdr = false;
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@ -599,6 +599,9 @@ void Writer::finalizeAddresses() {
 void Writer::run() {
  ScopedTimer t1(codeLayoutTimer);

+  // First, clear the output sections from previous runs
+  outputSections.clear();
+
  createImportTables();
  createSections();
  createMiscChunks();
--- a/lld/ELF/DWARF.cpp
+++ b/lld/ELF/DWARF.cpp
@ -26,7 +26,12 @@ using namespace lld;
 using namespace lld::elf;

 template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
-  for (InputSectionBase *sec : obj->getSections()) {
+  // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below.
+  ArrayRef<typename ELFT::Shdr> objSections =
+      CHECK(obj->getObj().sections(), obj);
+  assert(objSections.size() == obj->getSections().size());
+  for (auto it : llvm::enumerate(obj->getSections())) {
+    InputSectionBase *sec = it.value();
    if (!sec)
      continue;

@ -35,7 +40,6 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
                .Case(".debug_addr", &addrSection)
                .Case(".debug_gnu_pubnames", &gnuPubnamesSection)
                .Case(".debug_gnu_pubtypes", &gnuPubtypesSection)
-                .Case(".debug_info", &infoSection)
                .Case(".debug_loclists", &loclistsSection)
                .Case(".debug_ranges", &rangesSection)
                .Case(".debug_rnglists", &rnglistsSection)
@ -53,6 +57,20 @@ template <class ELFT> LLDDwarfObj<ELFT>::LLDDwarfObj(ObjFile<ELFT> *obj) {
      strSection = toStringRef(sec->data());
    else if (sec->name == ".debug_line_str")
      lineStrSection = toStringRef(sec->data());
+    else if (sec->name == ".debug_info" &&
+             !(objSections[it.index()].sh_flags & ELF::SHF_GROUP)) {
+      // In DWARF v5, -fdebug-types-section places type units in .debug_info
+      // sections in COMDAT groups. They are not compile units and thus should
+      // be ignored for .gdb_index/diagnostics purposes.
+      //
+      // We use a simple heuristic: the compile unit does not have the SHF_GROUP
+      // flag. If we place compile units in COMDAT groups in the future, we may
+      // need to perform a lightweight parsing. We drop the SHF_GROUP flag when
+      // the InputSection was created, so we need to retrieve sh_flags from the
+      // associated ELF section header.
+      infoSection.Data = toStringRef(sec->data());
+      infoSection.sec = sec;
+    }
  }
 }

--- a/lld/ELF/DWARF.h
+++ b/lld/ELF/DWARF.h
@ -32,6 +32,10 @@ template <class ELFT> class LLDDwarfObj final : public llvm::DWARFObject {
    f(infoSection);
  }

+  InputSection *getInfoSection() const {
+    return cast<InputSection>(infoSection.sec);
+  }
+
  const llvm::DWARFSection &getLoclistsSection() const override {
    return loclistsSection;
  }
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@ -180,7 +180,7 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) {
  // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`.
  uint64_t symValue = value.sec ? 0 : value.getValue();

-  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE,
+  Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type,
                 symValue, 0, sec);

  Symbol *sym = symtab->insert(cmd->name);
@ -317,6 +317,7 @@ void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) {
    cmd->sym->section = v.sec;
    cmd->sym->value = v.getSectionOffset();
  }
+  cmd->sym->type = v.type;
 }

 static std::string getFilename(InputFile *file) {
@ -1215,8 +1216,14 @@ ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) {
  }

  if (Symbol *sym = symtab->find(name)) {
-    if (auto *ds = dyn_cast<Defined>(sym))
-      return {ds->section, false, ds->value, loc};
+    if (auto *ds = dyn_cast<Defined>(sym)) {
+      ExprValue v{ds->section, false, ds->value, loc};
+      // Retain the original st_type, so that the alias will get the same
+      // behavior in relocation processing. Any operation will reset st_type to
+      // STT_NOTYPE.
+      v.type = ds->type;
+      return v;
+    }
    if (isa<SharedSymbol>(sym))
      if (!errorOnMissingSection)
        return {nullptr, false, 0, loc};
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@ -59,6 +59,10 @@ struct ExprValue {
  uint64_t val;
  uint64_t alignment = 1;

+  // The original st_type if the expression represents a symbol. Any operation
+  // resets type to STT_NOTYPE.
+  uint8_t type = llvm::ELF::STT_NOTYPE;
+
  // Original source location. Used for error messages.
  std::string loc;
 };
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@ -77,10 +77,14 @@ OutputSection::OutputSection(StringRef name, uint32_t type, uint64_t flags)
 // to be allocated for nobits sections. Other ones don't require
 // any special treatment on top of progbits, so there doesn't
 // seem to be a harm in merging them.
+//
+// NOTE: clang since rL252300 emits SHT_X86_64_UNWIND .eh_frame sections. Allow
+// them to be merged into SHT_PROGBITS .eh_frame (GNU as .cfi_*).
 static bool canMergeToProgbits(unsigned type) {
  return type == SHT_NOBITS || type == SHT_PROGBITS || type == SHT_INIT_ARRAY ||
         type == SHT_PREINIT_ARRAY || type == SHT_FINI_ARRAY ||
-         type == SHT_NOTE;
+         type == SHT_NOTE ||
+         (type == SHT_X86_64_UNWIND && config->emachine == EM_X86_64);
 }

 // Record that isec will be placed in the OutputSection. isec does not become
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@ -28,6 +28,7 @@
 #include "lld/Common/Strings.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
@ -2653,15 +2654,6 @@ void GdbIndexSection::initOutputSize() {
  }
 }

-static std::vector<InputSection *> getDebugInfoSections() {
-  std::vector<InputSection *> ret;
-  for (InputSectionBase *s : inputSections)
-    if (InputSection *isec = dyn_cast<InputSection>(s))
-      if (isec->name == ".debug_info")
-        ret.push_back(isec);
-  return ret;
-}
-
 static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &dwarf) {
  std::vector<GdbIndexSection::CuEntry> ret;
  for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units())
@ -2815,30 +2807,40 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,

 // Returns a newly-created .gdb_index section.
 template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
-  std::vector<InputSection *> sections = getDebugInfoSections();
-
-  // .debug_gnu_pub{names,types} are useless in executables.
-  // They are present in input object files solely for creating
-  // a .gdb_index. So we can remove them from the output.
-  for (InputSectionBase *s : inputSections)
+  // Collect InputFiles with .debug_info. See the comment in
+  // LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future,
+  // note that isec->data() may uncompress the full content, which should be
+  // parallelized.
+  SetVector<InputFile *> files;
+  for (InputSectionBase *s : inputSections) {
+    InputSection *isec = dyn_cast<InputSection>(s);
+    if (!isec)
+      continue;
+    // .debug_gnu_pub{names,types} are useless in executables.
+    // They are present in input object files solely for creating
+    // a .gdb_index. So we can remove them from the output.
    if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes")
      s->markDead();
+    else if (isec->name == ".debug_info")
+      files.insert(isec->file);
+  }

-  std::vector<GdbChunk> chunks(sections.size());
-  std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size());
+  std::vector<GdbChunk> chunks(files.size());
+  std::vector<std::vector<NameAttrEntry>> nameAttrs(files.size());

-  parallelForEachN(0, sections.size(), [&](size_t i) {
+  parallelForEachN(0, files.size(), [&](size_t i) {
    // To keep memory usage low, we don't want to keep cached DWARFContext, so
    // avoid getDwarf() here.
-    ObjFile<ELFT> *file = sections[i]->getFile<ELFT>();
+    ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);
    DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file));
+    auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj());

-    chunks[i].sec = sections[i];
+    // If the are multiple compile units .debug_info (very rare ld -r --unique),
+    // this only picks the last one. Other address ranges are lost.
+    chunks[i].sec = dobj.getInfoSection();
    chunks[i].compilationUnits = readCuList(dwarf);
-    chunks[i].addressAreas = readAddressAreas(dwarf, sections[i]);
-    nameAttrs[i] = readPubNamesAndTypes<ELFT>(
-        static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj()),
-        chunks[i].compilationUnits);
+    chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec);
+    nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits);
  });

  auto *ret = make<GdbIndexSection>();
--- a/lld/docs/ELF/linker_script.rst
+++ b/lld/docs/ELF/linker_script.rst
@ -17,6 +17,25 @@ possible. We reserve the right to make different implementation choices where
 it is appropriate for LLD. Intentional deviations will be documented in this
 file.

+Symbol assignment
+~~~~~~~~~~~~~~~~~
+
+A symbol assignment looks like:
+
+::
+
+  symbol = expression;
+  symbol += expression;
+
+The first form defines ``symbol``. If ``symbol`` is already defined, it will be
+overridden. The other form requires ``symbol`` to be already defined.
+
+For a simple assignment like ``alias = aliasee;``, the ``st_type`` field is
+copied from the original symbol. Any arithmetic operation (e.g. ``+ 0`` will
+reset ``st_type`` to ``STT_NOTYPE``.
+
+The ``st_size`` field is set to 0.
+
 Output section description
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@ -24,22 +24,143 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------

-* New ``--time-trace`` option records a time trace file that can be viewed in
+* ``--lto-emit-asm`` is added to emit assembly output for debugging purposes.
+  (`D77231 <https://reviews.llvm.org/D77231>`_)
+* ``--lto-whole-program-visibility`` is added to specify that classes have hidden LTO visibility in LTO and ThinLTO links of source files compiled with ``-fwhole-program-vtables``. See `LTOVisibility <https://clang.llvm.org/docs/LTOVisibility.html>`_ for details.
+  (`D71913 <https://reviews.llvm.org/D71913>`_)
+* ``--print-archive-stats=`` is added to print the number of members and the number of fetched members for each archive.
+  The feature is similar to GNU gold's ``--print-symbol-counts=``.
+  (`D78983 <https://reviews.llvm.org/D78983>`_)
+* ``--shuffle-sections=`` is added to introduce randomization in the output to help reduce measurement bias and detect static initialization order fiasco.
+  (`D74791 <https://reviews.llvm.org/D74791>`_)
+  (`D74887 <https://reviews.llvm.org/D74887>`_)
+* ``--time-trace`` is added. It records a time trace file that can be viewed in
  chrome://tracing. The file can be specified with ``--time-trace-file``.
  Trace granularity can be specified with ``--time-trace-granularity``.
  (`D71060 <https://reviews.llvm.org/D71060>`_)
-* For ARM architectures the default max page size was increased to 64k.
-  This increases compatibility with systems where a non standard page
-  size was configured. This also is inline with GNU ld defaults.
-  (`D77330 <https://reviews.llvm.org/D77330>`_)
-* ...
+* ``--thinlto-single-module`` is added to compile a subset of modules in ThinLTO for debugging purposes.
+  (`D80406 <https://reviews.llvm.org/D80406>`_)
+* ``--unique`` is added to create separate output sections for orphan sections.
+  (`D75536 <https://reviews.llvm.org/D75536>`_)
+* ``--warn-backrefs`` has been improved to emulate GNU ld's archive semantics.
+  If a link passes with warnings from ``--warn-backrefs``, it almost assuredly
+  means that the link will fail with GNU ld, or the symbol will get different
+  resolutions in GNU ld and LLD. ``--warn-backrefs-exclude=`` is added to
+  exclude known issues.
+  (`D77522 <https://reviews.llvm.org/D77522>`_)
+  (`D77630 <https://reviews.llvm.org/D77630>`_)
+  (`D77512 <https://reviews.llvm.org/D77512>`_)
+* ``--no-relax`` is accepted but ignored. The Linux kernel's RISC-V port uses this option.
+  (`D81359 <https://reviews.llvm.org/D81359>`_)
+* ``--rosegment`` (default) is added to complement ``--no-rosegment``.
+  GNU gold from 2.35 onwards support both options.
+* ``--threads=N`` is added. The default uses all threads.
+  (`D76885 <https://reviews.llvm.org/D76885>`_)
+* ``--wrap`` has better compatibility with GNU ld.
+* ``-z dead-reloc-in-nonalloc=<section_glob>=<value>`` is added to resolve an absolute relocation
+  referencing a discarded symbol.
+  (`D83264 <https://reviews.llvm.org/D83264>`_)
+* Changed tombstone values to (``.debug_ranges``/``.debug_loc``) 1 and (other ``.debug_*``) 0.
+  A tombstone value is the computed value of a relocation referencing a discarded symbol (``--gc-sections``, ICF or ``/DISCARD/``).
+  (`D84825 <https://reviews.llvm.org/D84825>`_)
+  In the future many .debug_* may switch to 0xffffffff/0xffffffffffffffff as the tombstone value.
+* ``-z keep-text-section-prefix`` moves ``.text.unknown.*`` input sections to ``.text.unknown``.
+* ``-z rel`` and ``-z rela`` are added to select the REL/RELA format for dynamic relocations.
+  The default is target specific and typically matches the form used in relocatable objects.
+* ``-z start-stop-visibility={default,protected,internal,hidden}`` is added.
+  GNU ld/gold from 2.35 onwards support this option.
+  (`D55682 <https://reviews.llvm.org/D55682>`_)
+* When ``-r`` or ``--emit-relocs`` is specified, the GNU ld compatible
+  ``--discard-all`` and ``--discard-locals`` semantics are implemented.
+  (`D77807 <https://reviews.llvm.org/D77807>`_)
+* ``--emit-relocs --strip-debug`` can now be used together.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ``--gdb-index`` supports DWARF v5.
+  (`D79061 <https://reviews.llvm.org/D79061>`_)
+  (`D85579 <https://reviews.llvm.org/D85579>`_)
+* ``-r`` allows SHT_X86_64_UNWIND to be merged into SHT_PROGBITS.
+  This allows clang/GCC produced object files to be mixed together.
+  (`D85785 <https://reviews.llvm.org/D85785>`_)
+* Better linker script support related to output section alignments and LMA regions.
+  (`D74286 <https://reviews.llvm.org/D75724>`_)
+  (`D74297 <https://reviews.llvm.org/D75724>`_)
+  (`D75724 <https://reviews.llvm.org/D75724>`_)
+  (`D81986 <https://reviews.llvm.org/D81986>`_)
+* In a input section description, the filename can be specified in double quotes.
+  ``archive:file`` syntax is added.
+  (`D72517 <https://reviews.llvm.org/D72517>`_)
+  (`D75100 <https://reviews.llvm.org/D75100>`_)
+* Linker script specified empty ``(.init|.preinit|.fini)_array`` are allowed with RELRO.
+  (`D76915 <https://reviews.llvm.org/D76915>`_)
+* ``INSERT AFTER`` and ``INSERT BEFORE`` work for orphan sections now.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ``INPUT_SECTION_FLAGS`` is supported in linker scripts.
+  (`D72745 <https://reviews.llvm.org/D72745>`_)
+* ``DF_1_PIE`` is set for position-independent executables.
+  (`D80872 <https://reviews.llvm.org/D80872>`_)
+* For a symbol assignment ``alias = aliasee;``, ``alias`` inherits the ``aliasee``'s symbol type.
+  (`D86263 <https://reviews.llvm.org/D86263>`_)
+* ``SHT_GNU_verneed`` in shared objects are parsed, and versioned undefined symbols in shared objects are respected.
+  (`D80059 <https://reviews.llvm.org/D80059>`_)
+* SHF_LINK_ORDER and non-SHF_LINK_ORDER sections can be mixed along as the SHF_LINK_ORDER components are contiguous.
+  (`D77007 <https://reviews.llvm.org/D77007>`_)
+* An out-of-range relocation diagnostic mentions the referenced symbol now.
+  (`D73518 <https://reviews.llvm.org/D73518>`_)
+* AArch64: ``R_AARCH64_PLT32`` is supported.
+  (`D81184 <https://reviews.llvm.org/D81184>`_)
+* ARM: SBREL type relocations are supported.
+  (`D74375 <https://reviews.llvm.org/D74375>`_)
+* ARM: ``R_ARM_ALU_PC_G0``, ``R_ARM_LDR_PC_G0``, ``R_ARM_THUMB_PC8`` and ``R_ARM_THUMB__PC12`` are supported.
+  (`D75349 <https://reviews.llvm.org/D75349>`_)
+  (`D77200 <https://reviews.llvm.org/D77200>`_)
+* ARM: various improvements to .ARM.exidx: ``/DISCARD/`` support for a subset, out-of-range handling, support for non monotonic section order.
+  (`PR44824 <https://llvm.org/PR44824>`_)
+* AVR: many relocation types are supported.
+  (`D78741 <https://reviews.llvm.org/D78741>`_)
+* Hexagon: General Dynamic and some other relocation types are supported.
+* PPC: Canonical PLT and range extension thunks with addends are supported.
+  (`D73399 <https://reviews.llvm.org/D73399>`_)
+  (`D73424 <https://reviews.llvm.org/D73424>`_)
+  (`D75394 <https://reviews.llvm.org/D75394>`_)
+* PPC and PPC64: copy relocations.
+  (`D73255 <https://reviews.llvm.org/D73255>`_)
+* PPC64: ``_savegpr[01]_{14..31}`` and ``_restgpr[01]_{14..31}`` can be synthesized.
+  (`D79977 <https://reviews.llvm.org/D79977>`_)
+* PPC64: ``R_PPC64_GOT_PCREL34`` and ``R_PPC64_REL24_NOTOC`` are supported. r2 save stub is supported.
+  (`D81948 <https://reviews.llvm.org/D81948>`_)
+  (`D82950 <https://reviews.llvm.org/D82950>`_)
+  (`D82816 <https://reviews.llvm.org/D82816>`_)
+* RISC-V: ``R_RISCV_IRELATIVE`` is supported.
+  (`D74022 <https://reviews.llvm.org/D74022>`_)
+* RISC-V: ``R_RISCV_ALIGN`` is errored because GNU ld style linker relaxation is not supported.
+  (`D71820 <https://reviews.llvm.org/D71820>`_)
+* SPARCv9: more relocation types are supported.
+  (`D77672 <https://reviews.llvm.org/D77672>`_)

 Breaking changes
 ----------------

 * One-dash form of some long option (``--thinlto-*``, ``--lto-*``, ``--shuffle-sections=``)
  are no longer supported.
+  (`D79371 <https://reviews.llvm.org/D79371>`_)
 * ``--export-dynamic-symbol`` no longer implies ``-u``.
+  The new behavior matches GNU ld from binutils 2.35 onwards.
+  (`D80487 <https://reviews.llvm.org/D80487>`_)
+* ARM: the default max page size was increased from 4096 to 65536.
+  This increases compatibility with systems where a non standard page
+  size was configured. This also is inline with GNU ld defaults.
+  (`D77330 <https://reviews.llvm.org/D77330>`_)
+* ARM: for non-STT_FUNC symbols, Thumb interworking thunks are not added and BL/BLX are not substituted.
+  (`D73474 <https://reviews.llvm.org/D73474>`_)
+  (`D73542 <https://reviews.llvm.org/D73542>`_)
+* AArch64: ``--force-bti`` is renamed to ``-z force-bti`. ``--pac-plt`` is renamed to ``-z pac-plt``.
+  This change is compatibile with GNU ld.
+* A readonly ``PT_LOAD`` is created in the presence of a ``SECTIONS`` command.
+  The new behavior is consistent with the longstanding behavior in the absence of a SECTIONS command.
+* Orphan section names like ``.rodata.foo`` and ``.text.foo`` are not grouped into ``.rodata`` and ``.text`` in the presence of a ``SECTIONS`` command.
+  The new behavior matches GNU ld.
+  (`D75225 <https://reviews.llvm.org/D75225>`_)
+* ``--no-threads`` is removed. Use ``--threads=1`` instead. ``--threads`` (no-op) is removed.

 COFF Improvements
 -----------------
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@ -2401,21 +2401,13 @@ lldb::addr_t Target::GetPersistentSymbol(ConstString name) {

 llvm::Expected<lldb_private::Address> Target::GetEntryPointAddress() {
  Module *exe_module = GetExecutableModulePointer();
-  llvm::Error error = llvm::Error::success();
-  assert(!error); // Check the success value when assertions are enabled.

-  if (!exe_module || !exe_module->GetObjectFile()) {
-    error = llvm::make_error<llvm::StringError>("No primary executable found",
-                                                llvm::inconvertibleErrorCode());
-  } else {
+  // Try to find the entry point address in the primary executable.
+  const bool has_primary_executable = exe_module && exe_module->GetObjectFile();
+  if (has_primary_executable) {
    Address entry_addr = exe_module->GetObjectFile()->GetEntryPointAddress();
    if (entry_addr.IsValid())
      return entry_addr;
-
-    error = llvm::make_error<llvm::StringError>(
-        "Could not find entry point address for executable module \"" +
-            exe_module->GetFileSpec().GetFilename().GetStringRef() + "\"",
-        llvm::inconvertibleErrorCode());
  }

  const ModuleList &modules = GetImages();
@ -2426,14 +2418,21 @@ llvm::Expected<lldb_private::Address> Target::GetEntryPointAddress() {
      continue;

    Address entry_addr = module_sp->GetObjectFile()->GetEntryPointAddress();
-    if (entry_addr.IsValid()) {
-      // Discard the error.
-      llvm::consumeError(std::move(error));
+    if (entry_addr.IsValid())
      return entry_addr;
-    }
  }

-  return std::move(error);
+  // We haven't found the entry point address. Return an appropriate error.
+  if (!has_primary_executable)
+    return llvm::make_error<llvm::StringError>(
+        "No primary executable found and could not find entry point address in "
+        "any executable module",
+        llvm::inconvertibleErrorCode());
+
+  return llvm::make_error<llvm::StringError>(
+      "Could not find entry point address for primary executable module \"" +
+          exe_module->GetFileSpec().GetFilename().GetStringRef() + "\"",
+      llvm::inconvertibleErrorCode());
 }

 lldb::addr_t Target::GetCallableLoadAddress(lldb::addr_t load_addr,
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@ -34,15 +34,14 @@ namespace llvm {
 /// performance for non-sequential find() operations.
 ///
 /// \tparam IndexT - The type of the index into the bitvector.
-/// \tparam N - The first N coalesced intervals of set bits are stored in-place.
-template <typename IndexT, unsigned N = 16> class CoalescingBitVector {
+template <typename IndexT> class CoalescingBitVector {
  static_assert(std::is_unsigned<IndexT>::value,
                "Index must be an unsigned integer.");

-  using ThisT = CoalescingBitVector<IndexT, N>;
+  using ThisT = CoalescingBitVector<IndexT>;

  /// An interval map for closed integer ranges. The mapped values are unused.
-  using MapT = IntervalMap<IndexT, char, N>;
+  using MapT = IntervalMap<IndexT, char>;

  using UnderlyingIterator = typename MapT::const_iterator;

--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@ -268,6 +268,12 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
 Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q,
                           OptimizationRemarkEmitter *ORE = nullptr);

+/// See if V simplifies when its operand Op is replaced with RepOp.
+/// AllowRefinement specifies whether the simplification can be a refinement,
+/// or whether it needs to be strictly identical.
+Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                              const SimplifyQuery &Q, bool AllowRefinement);
+
 /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
 ///
 /// This first performs a normal RAUW of I with SimpleV. It then recursively
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@ -210,12 +210,19 @@ class OpenMPIRBuilder {
  /// Return the (LLVM-IR) string describing the default source location.
  Constant *getOrCreateDefaultSrcLocStr();

+  /// Return the (LLVM-IR) string describing the source location identified by
+  /// the arguments.
+  Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
+                                 unsigned Line, unsigned Column);
+
  /// Return the (LLVM-IR) string describing the source location \p Loc.
  Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);

  /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
+  /// TODO: Create a enum class for the Reserve2Flags
  Value *getOrCreateIdent(Constant *SrcLocStr,
-                          omp::IdentFlag Flags = omp::IdentFlag(0));
+                          omp::IdentFlag Flags = omp::IdentFlag(0),
+                          unsigned Reserve2Flags = 0);

  /// Generate control flow and cleanup for cancellation.
  ///
@ -280,7 +287,7 @@ class OpenMPIRBuilder {
  StringMap<Constant *> SrcLocStrMap;

  /// Map to remember existing ident_t*.
-  DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap;
+  DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;

  /// Helper that contains information about regions we need to outline
  /// during finalization.
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@ -386,8 +386,12 @@ class IRBuilderBase {
  /// filled in with the null terminated string value specified.  The new global
  /// variable will be marked mergable with any others of the same contents.  If
  /// Name is specified, it is the name of the global variable created.
+  ///
+  /// If no module is given via \p M, it is take from the insertion point basic
+  /// block.
  GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "",
-                                     unsigned AddressSpace = 0);
+                                     unsigned AddressSpace = 0,
+                                     Module *M = nullptr);

  /// Get a constant value representing either true or false.
  ConstantInt *getInt1(bool V) {
@ -1934,9 +1938,13 @@ class IRBuilderBase {

  /// Same as CreateGlobalString, but return a pointer with "i8*" type
  /// instead of a pointer to array of i8.
+  ///
+  /// If no module is given via \p M, it is take from the insertion point basic
+  /// block.
  Constant *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "",
-                                  unsigned AddressSpace = 0) {
-    GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace);
+                                  unsigned AddressSpace = 0,
+                                  Module *M = nullptr) {
+    GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace, M);
    Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
    Constant *Indices[] = {Zero, Zero};
    return ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV,
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@ -3810,10 +3810,10 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
 }

-/// See if V simplifies when its operand Op is replaced with RepOp.
-static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                           const SimplifyQuery &Q,
-                                           unsigned MaxRecurse) {
+static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                     const SimplifyQuery &Q,
+                                     bool AllowRefinement,
+                                     unsigned MaxRecurse) {
  // Trivial replacement.
  if (V == Op)
    return RepOp;
@ -3826,23 +3826,19 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
  if (!I)
    return nullptr;

+  // Consider:
+  //   %cmp = icmp eq i32 %x, 2147483647
+  //   %add = add nsw i32 %x, 1
+  //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
+  //
+  // We can't replace %sel with %add unless we strip away the flags (which will
+  // be done in InstCombine).
+  // TODO: This is unsound, because it only catches some forms of refinement.
+  if (!AllowRefinement && canCreatePoison(I))
+    return nullptr;
+
  // If this is a binary operator, try to simplify it with the replaced op.
  if (auto *B = dyn_cast<BinaryOperator>(I)) {
-    // Consider:
-    //   %cmp = icmp eq i32 %x, 2147483647
-    //   %add = add nsw i32 %x, 1
-    //   %sel = select i1 %cmp, i32 -2147483648, i32 %add
-    //
-    // We can't replace %sel with %add unless we strip away the flags.
-    // TODO: This is an unusual limitation because better analysis results in
-    //       worse simplification. InstCombine can do this fold more generally
-    //       by dropping the flags. Remove this fold to save compile-time?
-    if (isa<OverflowingBinaryOperator>(B))
-      if (Q.IIQ.hasNoSignedWrap(B) || Q.IIQ.hasNoUnsignedWrap(B))
-        return nullptr;
-    if (isa<PossiblyExactOperator>(B) && Q.IIQ.isExact(B))
-      return nullptr;
-
    if (MaxRecurse) {
      if (B->getOperand(0) == Op)
        return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), Q,
@ -3909,6 +3905,13 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
  return nullptr;
 }

+Value *llvm::SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                    const SimplifyQuery &Q,
+                                    bool AllowRefinement) {
+  return ::SimplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement,
+                                  RecursionLimit);
+}
+
 /// Try to simplify a select instruction when its condition operand is an
 /// integer comparison where one operand of the compare is a constant.
 static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
@ -3968,12 +3971,18 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
  if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
    return nullptr;

-  if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
+  // Canonicalize ne to eq predicate.
+  if (Pred == ICmpInst::ICMP_NE) {
+    Pred = ICmpInst::ICMP_EQ;
+    std::swap(TrueVal, FalseVal);
+  }
+
+  if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
    Value *X;
    const APInt *Y;
    if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
      if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
-                                           Pred == ICmpInst::ICMP_EQ))
+                                           /*TrueWhenUnset=*/true))
        return V;

    // Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
@ -3984,13 +3993,7 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
                                                          m_Value(ShAmt)));
    // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
    // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
-    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_EQ)
-      return X;
-    // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
-    // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
-    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_NE)
+    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
      return X;

    // Test for a zero-shift-guard-op around rotates. These are used to
@ -4004,11 +4007,6 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
                                m_Intrinsic<Intrinsic::fshr>(m_Value(X),
                                                             m_Deferred(X),
                                                             m_Value(ShAmt)));
-    // (ShAmt != 0) ? fshl(X, X, ShAmt) : X --> fshl(X, X, ShAmt)
-    // (ShAmt != 0) ? fshr(X, X, ShAmt) : X --> fshr(X, X, ShAmt)
-    if (match(TrueVal, isRotate) && FalseVal == X && CmpLHS == ShAmt &&
-        Pred == ICmpInst::ICMP_NE)
-      return TrueVal;
    // (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
    // (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
    if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
@ -4025,27 +4023,20 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
  // arms of the select. See if substituting this value into the arm and
  // simplifying the result yields the same value as the other arm.
  if (Pred == ICmpInst::ICMP_EQ) {
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+                               /* AllowRefinement */ false, MaxRecurse) ==
            TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
+                               /* AllowRefinement */ false, MaxRecurse) ==
            TrueVal)
      return FalseVal;
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
+                               /* AllowRefinement */ true, MaxRecurse) ==
            FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
+                               /* AllowRefinement */ true, MaxRecurse) ==
            FalseVal)
      return FalseVal;
-  } else if (Pred == ICmpInst::ICMP_NE) {
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-            FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-            FalseVal)
-      return TrueVal;
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-            TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-            TrueVal)
-      return TrueVal;
  }

  return nullptr;
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@ -1592,11 +1592,16 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
    assert(Element->getTag() == dwarf::DW_TAG_subrange_type);

    const DISubrange *Subrange = cast<DISubrange>(Element);
-    assert(!Subrange->getRawLowerBound() &&
-           "codeview doesn't support subranges with lower bounds");
    int64_t Count = -1;
-    if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
-      Count = CI->getSExtValue();
+    // Calculate the count if either LowerBound is absent or is zero and
+    // either of Count or UpperBound are constant.
+    auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+    if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) {
+      if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
+        Count = CI->getSExtValue();
+      else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>())
+        Count = UI->getSExtValue() + 1; // LowerBound is zero
+    }

    // Forward declarations of arrays without a size and VLAs use a count of -1.
    // Emit a count of zero in these cases to match what MSVC does for arrays
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@ -1417,8 +1417,10 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
         Elements[0]->getTag() == dwarf::DW_TAG_subrange_type &&
         "Invalid vector element array, expected one element of type subrange");
  const auto Subrange = cast<DISubrange>(Elements[0]);
-  const auto CI = Subrange->getCount().get<ConstantInt *>();
-  const int32_t NumVecElements = CI->getSExtValue();
+  const auto NumVecElements =
+      Subrange->getCount()
+          ? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
+          : 0;

  // Ensure we found the element count and that the actual size is wide
  // enough to contain the requested size.
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@ -336,10 +336,8 @@ static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
                      unsigned Def, const TargetRegisterInfo *TRI) {
  Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
  Register PreviousDef = PreviousCopy.getOperand(0).getReg();
-  if (Src == PreviousSrc) {
-    assert(Def == PreviousDef);
+  if (Src == PreviousSrc && Def == PreviousDef)
    return true;
-  }
  if (!TRI->isSubRegister(PreviousSrc, Src))
    return false;
  unsigned SubIdx = TRI->getSubRegIndex(PreviousSrc, Src);
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@ -106,8 +106,13 @@ namespace {
    /// that it is alive across blocks.
    BitVector MayLiveAcrossBlocks;

-    /// State of a register unit.
-    enum RegUnitState {
+    /// State of a physical register.
+    enum RegState {
+      /// A disabled register is not available for allocation, but an alias may
+      /// be in use. A register can only be moved out of the disabled state if
+      /// all aliases are disabled.
+      regDisabled,
+
      /// A free register is not currently in use and can be allocated
      /// immediately without checking aliases.
      regFree,
@ -121,8 +126,8 @@ namespace {
      /// register. In that case, LiveVirtRegs contains the inverse mapping.
    };

-    /// Maps each physical register to a RegUnitState enum or virtual register.
-    std::vector<unsigned> RegUnitStates;
+    /// Maps each physical register to a RegState enum or a virtual register.
+    std::vector<unsigned> PhysRegState;

    SmallVector<Register, 16> VirtDead;
    SmallVector<MachineInstr *, 32> Coalesced;
@ -184,10 +189,6 @@ namespace {
    bool isLastUseOfLocalReg(const MachineOperand &MO) const;

    void addKillFlag(const LiveReg &LRI);
-#ifndef NDEBUG
-    bool verifyRegStateMapping(const LiveReg &LR) const;
-#endif
-
    void killVirtReg(LiveReg &LR);
    void killVirtReg(Register VirtReg);
    void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
@ -195,7 +196,7 @@ namespace {

    void usePhysReg(MachineOperand &MO);
    void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
-                       unsigned NewState);
+                       RegState NewState);
    unsigned calcSpillCost(MCPhysReg PhysReg) const;
    void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);

@ -228,7 +229,7 @@ namespace {
    bool mayLiveOut(Register VirtReg);
    bool mayLiveIn(Register VirtReg);

-    void dumpState() const;
+    void dumpState();
  };

 } // end anonymous namespace
@ -239,8 +240,7 @@ INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
                false)

 void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
-    RegUnitStates[*UI] = NewState;
+  PhysRegState[PhysReg] = NewState;
 }

 /// This allocates space for the specified virtual register to be held on the
@ -384,23 +384,12 @@ void RegAllocFast::addKillFlag(const LiveReg &LR) {
  }
 }

-#ifndef NDEBUG
-bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const {
-  for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) {
-    if (RegUnitStates[*UI] != LR.VirtReg)
-      return false;
-  }
-
-  return true;
-}
-#endif
-
 /// Mark virtreg as no longer available.
 void RegAllocFast::killVirtReg(LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
  addKillFlag(LR);
-  MCPhysReg PhysReg = LR.PhysReg;
-  setPhysRegState(PhysReg, regFree);
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg &&
+         "Broken RegState mapping");
+  setPhysRegState(LR.PhysReg, regFree);
  LR.PhysReg = 0;
 }

@ -427,9 +416,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,

 /// Do the actual work of spilling.
 void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
-  assert(verifyRegStateMapping(LR) && "Broken RegState mapping");
-
-  MCPhysReg PhysReg = LR.PhysReg;
+  assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping");

  if (LR.Dirty) {
    // If this physreg is used by the instruction, we want to kill it on the
@ -437,7 +424,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
    bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI;
    LR.Dirty = false;

-    spill(MI, LR.VirtReg, PhysReg, SpillKill);
+    spill(MI, LR.VirtReg, LR.PhysReg, SpillKill);

    if (SpillKill)
      LR.LastUse = nullptr; // Don't kill register again
@ -473,16 +460,53 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
  assert(PhysReg.isPhysical() && "Bad usePhysReg operand");

  markRegUsedInInstr(PhysReg);
+  switch (PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regReserved:
+    PhysRegState[PhysReg] = regFree;
+    LLVM_FALLTHROUGH;
+  case regFree:
+    MO.setIsKill();
+    return;
+  default:
+    // The physreg was allocated to a virtual register. That means the value we
+    // wanted has been clobbered.
+    llvm_unreachable("Instruction uses an allocated register");
+  }

-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (RegUnitStates[*UI]) {
+  // Maybe a superregister is reserved?
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (PhysRegState[Alias]) {
+    case regDisabled:
+      break;
    case regReserved:
-      RegUnitStates[*UI] = regFree;
+      // Either PhysReg is a subregister of Alias and we mark the
+      // whole register as free, or PhysReg is the superregister of
+      // Alias and we mark all the aliases as disabled before freeing
+      // PhysReg.
+      // In the latter case, since PhysReg was disabled, this means that
+      // its value is defined only by physical sub-registers. This check
+      // is performed by the assert of the default case in this loop.
+      // Note: The value of the superregister may only be partial
+      // defined, that is why regDisabled is a valid state for aliases.
+      assert((TRI->isSuperRegister(PhysReg, Alias) ||
+              TRI->isSuperRegister(Alias, PhysReg)) &&
+             "Instruction is not using a subregister of a reserved register");
      LLVM_FALLTHROUGH;
    case regFree:
+      if (TRI->isSuperRegister(PhysReg, Alias)) {
+        // Leave the superregister in the working set.
+        setPhysRegState(Alias, regFree);
+        MO.getParent()->addRegisterKilled(Alias, TRI, true);
+        return;
+      }
+      // Some other alias was in the working set - clear it.
+      setPhysRegState(Alias, regDisabled);
      break;
    default:
-      llvm_unreachable("Unexpected reg unit state");
+      llvm_unreachable("Instruction uses an alias of an allocated register");
    }
  }

@ -495,20 +519,38 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
 /// similar to defineVirtReg except the physreg is reserved instead of
 /// allocated.
 void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
-                                 MCPhysReg PhysReg, unsigned NewState) {
-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (unsigned VirtReg = RegUnitStates[*UI]) {
+                                 MCPhysReg PhysReg, RegState NewState) {
+  markRegUsedInInstr(PhysReg);
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  default:
+    spillVirtReg(MI, VirtReg);
+    LLVM_FALLTHROUGH;
+  case regFree:
+  case regReserved:
+    setPhysRegState(PhysReg, NewState);
+    return;
+  }
+
+  // This is a disabled register, disable all aliases.
+  setPhysRegState(PhysReg, NewState);
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (Register VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
    default:
      spillVirtReg(MI, VirtReg);
-      break;
+      LLVM_FALLTHROUGH;
    case regFree:
    case regReserved:
+      setPhysRegState(Alias, regDisabled);
+      if (TRI->isSuperRegister(PhysReg, Alias))
+        return;
      break;
    }
  }
-
-  markRegUsedInInstr(PhysReg);
-  setPhysRegState(PhysReg, NewState);
 }

 /// Return the cost of spilling clearing out PhysReg and aliases so it is free
@ -521,24 +563,46 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
                      << " is already used in instr.\n");
    return spillImpossible;
  }
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regFree:
+    return 0;
+  case regReserved:
+    LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
+                      << printReg(PhysReg, TRI) << " is reserved already.\n");
+    return spillImpossible;
+  default: {
+    LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+    assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+           "Missing VirtReg entry");
+    return LRI->Dirty ? spillDirty : spillClean;
+  }
+  }

-  for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-    switch (unsigned VirtReg = RegUnitStates[*UI]) {
+  // This is a disabled register, add up cost of aliases.
+  LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n");
+  unsigned Cost = 0;
+  for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
+    MCPhysReg Alias = *AI;
+    switch (Register VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
    case regFree:
+      ++Cost;
      break;
    case regReserved:
-      LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding "
-                        << printReg(PhysReg, TRI) << " is reserved already.\n");
      return spillImpossible;
    default: {
      LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
             "Missing VirtReg entry");
-      return LRI->Dirty ? spillDirty : spillClean;
+      Cost += LRI->Dirty ? spillDirty : spillClean;
+      break;
    }
    }
  }
-  return 0;
+  return Cost;
 }

 /// This method updates local state so that we know that PhysReg is the
@ -845,17 +909,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
    if (!Reg || !Reg.isPhysical())
      continue;
    markRegUsedInInstr(Reg);
-
-    for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) {
-      if (!ThroughRegs.count(RegUnitStates[*UI]))
-        continue;
-
-      // Need to spill any aliasing registers.
-      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
-        for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) {
-          definePhysReg(MI, *SI, regFree);
-        }
-      }
+    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
+      if (ThroughRegs.count(PhysRegState[*AI]))
+        definePhysReg(MI, *AI, regFree);
    }
  }

@ -919,40 +975,37 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
 }

 #ifndef NDEBUG
-
-void RegAllocFast::dumpState() const {
-  for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE;
-       ++Unit) {
-    switch (unsigned VirtReg = RegUnitStates[Unit]) {
+void RegAllocFast::dumpState() {
+  for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+    if (PhysRegState[Reg] == regDisabled) continue;
+    dbgs() << " " << printReg(Reg, TRI);
+    switch(PhysRegState[Reg]) {
    case regFree:
      break;
    case regReserved:
-      dbgs() << " " << printRegUnit(Unit, TRI) << "[P]";
+      dbgs() << "*";
      break;
    default: {
-      dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg);
-      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
-      assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry");
-      if (I->Dirty)
-        dbgs() << "[D]";
-      assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present");
+      dbgs() << '=' << printReg(PhysRegState[Reg]);
+      LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]);
+      assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
+             "Missing VirtReg entry");
+      if (LRI->Dirty)
+        dbgs() << "*";
+      assert(LRI->PhysReg == Reg && "Bad inverse map");
      break;
    }
    }
  }
  dbgs() << '\n';
  // Check that LiveVirtRegs is the inverse.
-  for (const LiveReg &LR : LiveVirtRegs) {
-    Register VirtReg = LR.VirtReg;
-    assert(VirtReg.isVirtual() && "Bad map key");
-    MCPhysReg PhysReg = LR.PhysReg;
-    if (PhysReg != 0) {
-      assert(Register::isPhysicalRegister(PhysReg) &&
-             "mapped to physreg");
-      for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
-        assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
-      }
-    }
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+       e = LiveVirtRegs.end(); i != e; ++i) {
+    if (!i->PhysReg)
+      continue;
+    assert(i->VirtReg.isVirtual() && "Bad map key");
+    assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
+    assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
  }
 }
 #endif
@ -1194,7 +1247,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
  this->MBB = &MBB;
  LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);

-  RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
+  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
  assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");

  MachineBasicBlock::iterator MII = MBB.begin();
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@ -690,6 +690,12 @@ bool FastISel::selectGetElementPtr(const User *I) {
  Register N = getRegForValue(I->getOperand(0));
  if (!N) // Unhandled operand. Halt "fast" selection and bail.
    return false;
+
+  // FIXME: The code below does not handle vector GEPs. Halt "fast" selection
+  // and bail.
+  if (isa<VectorType>(I->getType()))
+    return false;
+
  bool NIsKill = hasTrivialKill(I->getOperand(0));

  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -409,7 +409,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
      // as appropriate.
      for (unsigned i = 0; i != NumParts; ++i)
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
-                                  PartVT, IntermediateVT, V);
+                                  PartVT, IntermediateVT, V, CallConv);
    } else if (NumParts > 0) {
      // If the intermediate type was expanded, build the intermediate
      // operands from the parts.
@ -418,7 +418,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
      unsigned Factor = NumParts / NumIntermediates;
      for (unsigned i = 0; i != NumIntermediates; ++i)
        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
-                                  PartVT, IntermediateVT, V);
+                                  PartVT, IntermediateVT, V, CallConv);
    }

    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@ -5726,6 +5726,11 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
      return SDValue();
  }

+  auto RemoveDeadNode = [&](SDValue N) {
+    if (N && N.getNode()->use_empty())
+      DAG.RemoveDeadNode(N.getNode());
+  };
+
  SDLoc DL(Op);

  switch (Opcode) {
@ -5804,13 +5809,17 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
    // Negate the X if its cost is less or equal than Y.
    if (NegX && (CostX <= CostY)) {
      Cost = CostX;
-      return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
+      RemoveDeadNode(NegY);
+      return N;
    }

    // Negate the Y if it is not expensive.
    if (NegY) {
      Cost = CostY;
-      return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+      SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
+      RemoveDeadNode(NegX);
+      return N;
    }
    break;
  }
@ -5847,7 +5856,9 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
    // Negate the X if its cost is less or equal than Y.
    if (NegX && (CostX <= CostY)) {
      Cost = CostX;
-      return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
+      RemoveDeadNode(NegY);
+      return N;
    }

    // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
@ -5858,7 +5869,9 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
    // Negate the Y if it is not expensive.
    if (NegY) {
      Cost = CostY;
-      return DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
+      RemoveDeadNode(NegX);
+      return N;
    }
    break;
  }
@ -5887,13 +5900,17 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
    // Negate the X if its cost is less or equal than Y.
    if (NegX && (CostX <= CostY)) {
      Cost = std::min(CostX, CostZ);
-      return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
+      SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
+      RemoveDeadNode(NegY);
+      return N;
    }

    // Negate the Y if it is not expensive.
    if (NegY) {
      Cost = std::min(CostY, CostZ);
-      return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
+      SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
+      RemoveDeadNode(NegX);
+      return N;
    }
    break;
  }
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@ -1827,7 +1827,10 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
  if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
    Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
    PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
-    return M.getOrInsertGlobal("__guard_local", PtrTy);
+    Constant *C = M.getOrInsertGlobal("__guard_local", PtrTy);
+    if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(C))
+      G->setVisibility(GlobalValue::HiddenVisibility);
+    return C;
  }
  return nullptr;
 }
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@ -185,16 +185,18 @@ void OpenMPIRBuilder::finalize() {
 }

 Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
-                                         IdentFlag LocFlags) {
+                                         IdentFlag LocFlags,
+                                         unsigned Reserve2Flags) {
  // Enable "C-mode".
  LocFlags |= OMP_IDENT_FLAG_KMPC;

-  GlobalVariable *&DefaultIdent = IdentMap[{SrcLocStr, uint64_t(LocFlags)}];
-  if (!DefaultIdent) {
+  Value *&Ident =
+      IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
+  if (!Ident) {
    Constant *I32Null = ConstantInt::getNullValue(Int32);
-    Constant *IdentData[] = {I32Null,
-                             ConstantInt::get(Int32, uint64_t(LocFlags)),
-                             I32Null, I32Null, SrcLocStr};
+    Constant *IdentData[] = {
+        I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
+        ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
    Constant *Initializer = ConstantStruct::get(
        cast<StructType>(IdentPtr->getPointerElementType()), IdentData);

@ -203,15 +205,16 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
    for (GlobalVariable &GV : M.getGlobalList())
      if (GV.getType() == IdentPtr && GV.hasInitializer())
        if (GV.getInitializer() == Initializer)
-          return DefaultIdent = &GV;
+          return Ident = &GV;

-    DefaultIdent = new GlobalVariable(M, IdentPtr->getPointerElementType(),
-                                      /* isConstant = */ false,
-                                      GlobalValue::PrivateLinkage, Initializer);
-    DefaultIdent->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-    DefaultIdent->setAlignment(Align(8));
+    auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
+                                  /* isConstant = */ true,
+                                  GlobalValue::PrivateLinkage, Initializer);
+    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+    GV->setAlignment(Align(8));
+    Ident = GV;
  }
-  return DefaultIdent;
+  return Ident;
 }

 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
@ -227,11 +230,30 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
          GV.getInitializer() == Initializer)
        return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);

-    SrcLocStr = Builder.CreateGlobalStringPtr(LocStr);
+    SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
+                                              /* AddressSpace */ 0, &M);
  }
  return SrcLocStr;
 }

+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
+                                                StringRef FileName,
+                                                unsigned Line,
+                                                unsigned Column) {
+  SmallString<128> Buffer;
+  Buffer.push_back(';');
+  Buffer.append(FileName);
+  Buffer.push_back(';');
+  Buffer.append(FunctionName);
+  Buffer.push_back(';');
+  Buffer.append(std::to_string(Line));
+  Buffer.push_back(';');
+  Buffer.append(std::to_string(Column));
+  Buffer.push_back(';');
+  Buffer.push_back(';');
+  return getOrCreateSrcLocStr(Buffer.str());
+}
+
 Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
 }
@ -241,17 +263,13 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
  DILocation *DIL = Loc.DL.get();
  if (!DIL)
    return getOrCreateDefaultSrcLocStr();
-  StringRef Filename =
+  StringRef FileName =
      !DIL->getFilename().empty() ? DIL->getFilename() : M.getName();
  StringRef Function = DIL->getScope()->getSubprogram()->getName();
  Function =
      !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
-  std::string LineStr = std::to_string(DIL->getLine());
-  std::string ColumnStr = std::to_string(DIL->getColumn());
-  std::stringstream SrcLocStr;
-  SrcLocStr << ";" << Filename.data() << ";" << Function.data() << ";"
-            << LineStr << ";" << ColumnStr << ";;";
-  return getOrCreateSrcLocStr(SrcLocStr.str());
+  return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
+                              DIL->getColumn());
 }

 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@ -42,13 +42,14 @@ using namespace llvm;
 /// created.
 GlobalVariable *IRBuilderBase::CreateGlobalString(StringRef Str,
                                                  const Twine &Name,
-                                                  unsigned AddressSpace) {
+                                                  unsigned AddressSpace,
+                                                  Module *M) {
  Constant *StrConstant = ConstantDataArray::getString(Context, Str);
-  Module &M = *BB->getParent()->getParent();
-  auto *GV = new GlobalVariable(M, StrConstant->getType(), true,
-                                GlobalValue::PrivateLinkage, StrConstant, Name,
-                                nullptr, GlobalVariable::NotThreadLocal,
-                                AddressSpace);
+  if (!M)
+    M = BB->getParent()->getParent();
+  auto *GV = new GlobalVariable(
+      *M, StrConstant->getType(), true, GlobalValue::PrivateLinkage,
+      StrConstant, Name, nullptr, GlobalVariable::NotThreadLocal, AddressSpace);
  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  GV->setAlignment(Align(1));
  return GV;
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@ -1475,74 +1475,6 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
  }
 }

-#ifdef EXPENSIVE_CHECKS
-namespace {
-namespace details {
-
-// Basic hashing mechanism to detect structural change to the IR, used to verify
-// pass return status consistency with actual change. Loosely copied from
-// llvm/lib/Transforms/Utils/FunctionComparator.cpp
-
-class StructuralHash {
-  uint64_t Hash = 0x6acaa36bef8325c5ULL;
-
-  void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
-
-public:
-  StructuralHash() = default;
-
-  void update(Function &F) {
-    if (F.empty())
-      return;
-
-    update(F.isVarArg());
-    update(F.arg_size());
-
-    SmallVector<const BasicBlock *, 8> BBs;
-    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
-
-    BBs.push_back(&F.getEntryBlock());
-    VisitedBBs.insert(BBs[0]);
-    while (!BBs.empty()) {
-      const BasicBlock *BB = BBs.pop_back_val();
-      update(45798); // Block header
-      for (auto &Inst : *BB)
-        update(Inst.getOpcode());
-
-      const Instruction *Term = BB->getTerminator();
-      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-        if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
-          continue;
-        BBs.push_back(Term->getSuccessor(i));
-      }
-    }
-  }
-
-  void update(Module &M) {
-    for (Function &F : M)
-      update(F);
-  }
-
-  uint64_t getHash() const { return Hash; }
-};
-
-} // namespace details
-
-uint64_t StructuralHash(Function &F) {
-  details::StructuralHash H;
-  H.update(F);
-  return H.getHash();
-}
-
-uint64_t StructuralHash(Module &M) {
-  details::StructuralHash H;
-  H.update(M);
-  return H.getHash();
-}
-
-} // end anonymous namespace
-
-#endif

 /// Execute all of the passes scheduled for execution by invoking
 /// runOnFunction method.  Keep track of whether any of the passes modifies
@ -1581,16 +1513,7 @@ bool FPPassManager::runOnFunction(Function &F) {
    {
      PassManagerPrettyStackEntry X(FP, F);
      TimeRegion PassTimer(getPassTimer(FP));
-#ifdef EXPENSIVE_CHECKS
-      uint64_t RefHash = StructuralHash(F);
-#endif
      LocalChanged |= FP->runOnFunction(F);
-
-#ifdef EXPENSIVE_CHECKS
-      assert((LocalChanged || (RefHash == StructuralHash(F))) &&
-             "Pass modifies its input and doesn't report it.");
-#endif
-
      if (EmitICRemark) {
        unsigned NewSize = F.getInstructionCount();

@ -1691,17 +1614,7 @@ MPPassManager::runOnModule(Module &M) {
      PassManagerPrettyStackEntry X(MP, M);
      TimeRegion PassTimer(getPassTimer(MP));

-#ifdef EXPENSIVE_CHECKS
-      uint64_t RefHash = StructuralHash(M);
-#endif
-
      LocalChanged |= MP->runOnModule(M);
-
-#ifdef EXPENSIVE_CHECKS
-      assert((LocalChanged || (RefHash == StructuralHash(M))) &&
-             "Pass modifies its input and doesn't report it.");
-#endif
-
      if (EmitICRemark) {
        // Update the size of the module.
        unsigned ModuleCount = M.getInstructionCount();
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@ -522,7 +522,7 @@ static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {};
 static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {};

 // SSE4_A->FMA4->XOP chain.
-static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSSE3;
+static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3;
 static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A;
 static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4;

--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@ -1694,11 +1694,10 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
  StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
  MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
  if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
-    RestoreBegin = std::prev(RestoreEnd);;
-    while (IsSVECalleeSave(RestoreBegin) &&
-           RestoreBegin != MBB.begin())
+    RestoreBegin = std::prev(RestoreEnd);
+    while (RestoreBegin != MBB.begin() &&
+           IsSVECalleeSave(std::prev(RestoreBegin)))
      --RestoreBegin;
-    ++RestoreBegin;

    assert(IsSVECalleeSave(RestoreBegin) &&
           IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@ -5416,7 +5416,7 @@ multiclass sve_mem_64b_sst_vi_ptrs<bits<3> opc, string asm,
  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $imm5]",
                  (!cast<Instruction>(NAME # _IMM) ZPR64:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, imm_ty:$imm5), 0>;
  def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
-                  (!cast<Instruction>(NAME # _IMM) Z_s:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;
+                  (!cast<Instruction>(NAME # _IMM) Z_d:$Zt, PPR3bAny:$Pg, ZPR64:$Zn, 0), 1>;

  def : Pat<(op (nxv2i64 ZPR:$data), (nxv2i1 PPR:$gp), (nxv2i64 ZPR:$ptrs), imm_ty:$index, vt),
            (!cast<Instruction>(NAME # _IMM) ZPR:$data, PPR:$gp, ZPR:$ptrs, imm_ty:$index)>;
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@ -192,8 +192,8 @@ static bool updateOperand(FoldCandidate &Fold,
  if (Fold.isImm()) {
    if (MI->getDesc().TSFlags & SIInstrFlags::IsPacked &&
        !(MI->getDesc().TSFlags & SIInstrFlags::IsMAI) &&
-        AMDGPU::isInlinableLiteralV216(static_cast<uint16_t>(Fold.ImmToFold),
-                                       ST.hasInv2PiInlineImm())) {
+        AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
+                                      ST.hasInv2PiInlineImm())) {
      // Set op_sel/op_sel_hi on this operand or bail out if op_sel is
      // already set.
      unsigned Opcode = MI->getOpcode();
@ -209,30 +209,30 @@ static bool updateOperand(FoldCandidate &Fold,
      ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
      MachineOperand &Mod = MI->getOperand(ModIdx);
      unsigned Val = Mod.getImm();
-      if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1))
-        return false;
-      // Only apply the following transformation if that operand requries
-      // a packed immediate.
-      switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
-      case AMDGPU::OPERAND_REG_IMM_V2FP16:
-      case AMDGPU::OPERAND_REG_IMM_V2INT16:
-      case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
-      case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
-        // If upper part is all zero we do not need op_sel_hi.
-        if (!isUInt<16>(Fold.ImmToFold)) {
-          if (!(Fold.ImmToFold & 0xffff)) {
-            Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+      if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
+        // Only apply the following transformation if that operand requries
+        // a packed immediate.
+        switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
+        case AMDGPU::OPERAND_REG_IMM_V2FP16:
+        case AMDGPU::OPERAND_REG_IMM_V2INT16:
+        case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+        case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+          // If upper part is all zero we do not need op_sel_hi.
+          if (!isUInt<16>(Fold.ImmToFold)) {
+            if (!(Fold.ImmToFold & 0xffff)) {
+              Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0);
+              Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
+              Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+              return true;
+            }
            Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-            Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff);
+            Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
            return true;
          }
-          Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
-          Old.ChangeToImmediate(Fold.ImmToFold & 0xffff);
-          return true;
+          break;
+        default:
+          break;
        }
-        break;
-      default:
-        break;
      }
    }
  }
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@ -1282,6 +1282,19 @@ bool isInlinableIntLiteralV216(int32_t Literal) {
  return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
 }

+bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+  assert(HasInv2Pi);
+
+  int16_t Lo16 = static_cast<int16_t>(Literal);
+  if (isInt<16>(Literal) || isUInt<16>(Literal))
+    return true;
+
+  int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+  if (!(Literal & 0xffff))
+    return true;
+  return Lo16 == Hi16;
+}
+
 bool isArgPassedInSGPR(const Argument *A) {
  const Function *F = A->getParent();

--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@ -660,6 +660,9 @@ bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
 LLVM_READNONE
 bool isInlinableIntLiteralV216(int32_t Literal);

+LLVM_READNONE
+bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
 bool isArgPassedInSGPR(const Argument *Arg);

 LLVM_READONLY
--- a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@ -78,9 +78,9 @@ class PPCBoolRetToInt : public FunctionPass {
      Value *Curr = WorkList.back();
      WorkList.pop_back();
      auto *CurrUser = dyn_cast<User>(Curr);
-      // Operands of CallInst are skipped because they may not be Bool type,
-      // and their positions are defined by ABI.
-      if (CurrUser && !isa<CallInst>(Curr))
+      // Operands of CallInst/Constant are skipped because they may not be Bool
+      // type. For CallInst, their positions are defined by ABI.
+      if (CurrUser && !isa<CallInst>(Curr) && !isa<Constant>(Curr))
        for (auto &Op : CurrUser->operands())
          if (Defs.insert(Op).second)
            WorkList.push_back(Op);
@ -90,6 +90,9 @@ class PPCBoolRetToInt : public FunctionPass {

  // Translate a i1 value to an equivalent i32/i64 value:
  Value *translate(Value *V) {
+    assert(V->getType() == Type::getInt1Ty(V->getContext()) &&
+           "Expect an i1 value");
+
    Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
                                : Type::getInt32Ty(V->getContext());

@ -252,9 +255,9 @@ class PPCBoolRetToInt : public FunctionPass {
      auto *First = dyn_cast<User>(Pair.first);
      auto *Second = dyn_cast<User>(Pair.second);
      assert((!First || Second) && "translated from user to non-user!?");
-      // Operands of CallInst are skipped because they may not be Bool type,
-      // and their positions are defined by ABI.
-      if (First && !isa<CallInst>(First))
+      // Operands of CallInst/Constant are skipped because they may not be Bool
+      // type. For CallInst, their positions are defined by ABI.
+      if (First && !isa<CallInst>(First) && !isa<Constant>(First))
        for (unsigned i = 0; i < First->getNumOperands(); ++i)
          Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
    }
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -799,7 +799,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
    setOperationAction(ISD::FMA, MVT::v4f32, Legal);

-    if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
+    if (Subtarget.hasVSX()) {
      setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
      setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
    }
@ -920,6 +920,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
        setOperationAction(ISD::SUB, MVT::v2i64, Expand);
      }

+      setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
+
      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
@ -1258,6 +1260,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setLibcallName(RTLIB::SRA_I128, nullptr);
  }

+  if (!isPPC64)
+    setMaxAtomicSizeInBitsSupported(32);
+
  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);

  // We have target-specific dag combine patterns for the following nodes:
@ -1295,12 +1300,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
    setTargetDAGCombine(ISD::SELECT_CC);
  }

-  // Use reciprocal estimates.
-  if (TM.Options.UnsafeFPMath) {
-    setTargetDAGCombine(ISD::FDIV);
-    setTargetDAGCombine(ISD::FSQRT);
-  }
-
  if (Subtarget.hasP9Altivec()) {
    setTargetDAGCombine(ISD::ABS);
    setTargetDAGCombine(ISD::VSELECT);
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@ -1026,8 +1026,8 @@ def : InstAlias<"mfamr $Rx", (MFSPR8 g8rc:$Rx, 29)>;
 foreach SPRG = 0-3 in {
  def : InstAlias<"mfsprg $RT, "#SPRG, (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
  def : InstAlias<"mfsprg"#SPRG#" $RT", (MFSPR8 g8rc:$RT, !add(SPRG, 272))>;
-  def : InstAlias<"mfsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
-  def : InstAlias<"mfsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+  def : InstAlias<"mtsprg "#SPRG#", $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
+  def : InstAlias<"mtsprg"#SPRG#" $RT", (MTSPR8 !add(SPRG, 272), g8rc:$RT)>;
 }

 def : InstAlias<"mfasr $RT", (MFSPR8 g8rc:$RT, 280)>;
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@ -1555,6 +1555,8 @@ bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
  MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
  MI.getOperand(2).setImm(NewSH);
  MI.getOperand(3).setImm(NewMB);
+  MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
+  SrcMI->getOperand(1).setIsKill(false);

  LLVM_DEBUG(dbgs() << "To: ");
  LLVM_DEBUG(MI.dump());
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@ -601,8 +601,8 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
 }

 bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
-  MVT ExtT = ExtVal.getSimpleValueType();
-  MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0);
+  EVT ExtT = ExtVal.getValueType();
+  EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
  return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
         (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
         (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -1148,22 +1148,6 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
  return &Sel;
 }

-static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *ReplaceOp,
-                                     const SimplifyQuery &Q) {
-  // If this is a binary operator, try to simplify it with the replaced op
-  // because we know Op and ReplaceOp are equivalant.
-  // For example: V = X + 1, Op = X, ReplaceOp = 42
-  // Simplifies as: add(42, 1) --> 43
-  if (auto *BO = dyn_cast<BinaryOperator>(V)) {
-    if (BO->getOperand(0) == Op)
-      return SimplifyBinOp(BO->getOpcode(), ReplaceOp, BO->getOperand(1), Q);
-    if (BO->getOperand(1) == Op)
-      return SimplifyBinOp(BO->getOpcode(), BO->getOperand(0), ReplaceOp, Q);
-  }
-
-  return nullptr;
-}
-
 /// If we have a select with an equality comparison, then we know the value in
 /// one of the arms of the select. See if substituting this value into an arm
 /// and simplifying the result yields the same value as the other arm.
@ -1190,20 +1174,45 @@ static Value *foldSelectValueEquivalence(SelectInst &Sel, ICmpInst &Cmp,
  if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
    std::swap(TrueVal, FalseVal);

+  auto *FalseInst = dyn_cast<Instruction>(FalseVal);
+  if (!FalseInst)
+    return nullptr;
+
+  // InstSimplify already performed this fold if it was possible subject to
+  // current poison-generating flags. Try the transform again with
+  // poison-generating flags temporarily dropped.
+  bool WasNUW = false, WasNSW = false, WasExact = false;
+  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(FalseVal)) {
+    WasNUW = OBO->hasNoUnsignedWrap();
+    WasNSW = OBO->hasNoSignedWrap();
+    FalseInst->setHasNoUnsignedWrap(false);
+    FalseInst->setHasNoSignedWrap(false);
+  }
+  if (auto *PEO = dyn_cast<PossiblyExactOperator>(FalseVal)) {
+    WasExact = PEO->isExact();
+    FalseInst->setIsExact(false);
+  }
+
  // Try each equivalence substitution possibility.
  // We have an 'EQ' comparison, so the select's false value will propagate.
  // Example:
  // (X == 42) ? 43 : (X + 1) --> (X == 42) ? (X + 1) : (X + 1) --> X + 1
-  // (X == 42) ? (X + 1) : 43 --> (X == 42) ? (42 + 1) : 43 --> 43
  Value *CmpLHS = Cmp.getOperand(0), *CmpRHS = Cmp.getOperand(1);
-  if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q) == TrueVal ||
-      simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q) == TrueVal ||
-      simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q) == FalseVal ||
-      simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q) == FalseVal) {
-    if (auto *FalseInst = dyn_cast<Instruction>(FalseVal))
-      FalseInst->dropPoisonGeneratingFlags();
+  if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+                             /* AllowRefinement */ false) == TrueVal ||
+      SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
+                             /* AllowRefinement */ false) == TrueVal) {
    return FalseVal;
  }
+
+  // Restore poison-generating flags if the transform did not apply.
+  if (WasNUW)
+    FalseInst->setHasNoUnsignedWrap();
+  if (WasNSW)
+    FalseInst->setHasNoSignedWrap();
+  if (WasExact)
+    FalseInst->setIsExact();
+
  return nullptr;
 }