Merge llvm, clang, lld and lldb trunk r291476.

2017-01-09 22:32:19 +00:00 · 2017-01-09 22:32:19 +00:00 · 618592e561
commit 618592e561
parent c76fab14dc ff628bebf0 0778366e3c
136 changed files with 4340 additions and 1645 deletions
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@ -1491,6 +1491,8 @@ class ScalarEvolution {

  void print(raw_ostream &OS) const;
  void verify() const;
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);

  /// Collect parametric terms occurring in step expressions (first step of
  /// delinearization).
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@ -290,7 +290,7 @@ class TargetLibraryInfo {
  }

  /// Returns extension attribute kind to be used for i32 parameters
-  /// correpsonding to C-level int or unsigned int.  May be zeroext, signext,
+  /// corresponding to C-level int or unsigned int.  May be zeroext, signext,
  /// or none.
  Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const {
    if (Impl->ShouldExtI32Param)
@ -301,7 +301,7 @@ class TargetLibraryInfo {
  }

  /// Returns extension attribute kind to be used for i32 return values
-  /// correpsonding to C-level int or unsigned int.  May be zeroext, signext,
+  /// corresponding to C-level int or unsigned int.  May be zeroext, signext,
  /// or none.
  Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const {
    if (Impl->ShouldExtI32Return)
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@ -308,6 +308,16 @@ class MachineBasicBlock
  // Iteration support for live in sets.  These sets are kept in sorted
  // order by their register number.
  typedef LiveInVector::const_iterator livein_iterator;
+#ifndef NDEBUG
+  /// Unlike livein_begin, this method does not check that the liveness
+  /// information is accurate. Still for debug purposes it may be useful
+  /// to have iterators that won't assert if the liveness information
+  /// is not current.
+  livein_iterator livein_begin_dbg() const { return LiveIns.begin(); }
+  iterator_range<livein_iterator> liveins_dbg() const {
+    return make_range(livein_begin_dbg(), livein_end());
+  }
+#endif
  livein_iterator livein_begin() const;
  livein_iterator livein_end()   const { return LiveIns.end(); }
  bool            livein_empty() const { return LiveIns.empty(); }
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@ -148,8 +148,7 @@ class MachineFrameInfo {
  /// grouping overaligned allocas into a "secondary stack frame" and
  /// then only use a single alloca to allocate this frame and only a
  /// single virtual register to access it. Currently, without such an
-  /// optimization, each such alloca gets it's own dynamic
-  /// realignment.
+  /// optimization, each such alloca gets its own dynamic realignment.
  bool StackRealignable;

  /// Whether the function has the \c alignstack attribute.
--- a/contrib/llvm/include/llvm/DebugInfo/MSF/StreamArray.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamArray.h
@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_MSF_STREAMARRAY_H

 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Error.h"
 #include <cassert>
@ -107,7 +108,10 @@ class VarStreamArray {
  Extractor E;
 };

-template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
+template <typename ValueType, typename Extractor>
+class VarStreamArrayIterator
+    : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
+                                  std::forward_iterator_tag, ValueType> {
  typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
  typedef VarStreamArray<ValueType, Extractor> ArrayType;

@ -144,41 +148,39 @@ template <typename ValueType, typename Extractor> class VarStreamArrayIterator {
    return false;
  }

-  bool operator!=(const IterType &R) { return !(*this == R); }
-
  const ValueType &operator*() const {
    assert(Array && !HasError);
    return ThisValue;
  }

-  IterType &operator++() {
-    // We are done with the current record, discard it so that we are
-    // positioned at the next record.
-    IterRef = IterRef.drop_front(ThisLen);
-    if (IterRef.getLength() == 0) {
-      // There is nothing after the current record, we must make this an end
-      // iterator.
-      moveToEnd();
-    } else {
-      // There is some data after the current record.
-      auto EC = Extract(IterRef, ThisLen, ThisValue);
-      if (EC) {
-        consumeError(std::move(EC));
-        markError();
-      } else if (ThisLen == 0) {
-        // An empty record? Make this an end iterator.
+  IterType &operator+=(std::ptrdiff_t N) {
+    while (N > 0) {
+      // We are done with the current record, discard it so that we are
+      // positioned at the next record.
+      IterRef = IterRef.drop_front(ThisLen);
+      if (IterRef.getLength() == 0) {
+        // There is nothing after the current record, we must make this an end
+        // iterator.
        moveToEnd();
+        return *this;
+      } else {
+        // There is some data after the current record.
+        auto EC = Extract(IterRef, ThisLen, ThisValue);
+        if (EC) {
+          consumeError(std::move(EC));
+          markError();
+          return *this;
+        } else if (ThisLen == 0) {
+          // An empty record? Make this an end iterator.
+          moveToEnd();
+          return *this;
+        }
      }
+      --N;
    }
    return *this;
  }

-  IterType operator++(int) {
-    IterType Original = *this;
-    ++*this;
-    return Original;
-  }
-
 private:
  void moveToEnd() {
    Array = nullptr;
@ -211,6 +213,16 @@ template <typename T> class FixedStreamArray {
    assert(Stream.getLength() % sizeof(T) == 0);
  }

+  bool operator==(const FixedStreamArray<T> &Other) const {
+    return Stream == Other.Stream;
+  }
+
+  bool operator!=(const FixedStreamArray<T> &Other) const {
+    return !(*this == Other);
+  }
+
+  FixedStreamArray &operator=(const FixedStreamArray &) = default;
+
  const T &operator[](uint32_t Index) const {
    assert(Index < size());
    uint32_t Off = Index * sizeof(T);
@ -226,6 +238,8 @@ template <typename T> class FixedStreamArray {

  uint32_t size() const { return Stream.getLength() / sizeof(T); }

+  bool empty() const { return size() == 0; }
+
  FixedStreamArrayIterator<T> begin() const {
    return FixedStreamArrayIterator<T>(*this, 0);
  }
@ -240,36 +254,53 @@ template <typename T> class FixedStreamArray {
  ReadableStreamRef Stream;
 };

-template <typename T> class FixedStreamArrayIterator {
+template <typename T>
+class FixedStreamArrayIterator
+    : public iterator_facade_base<FixedStreamArrayIterator<T>,
+                                  std::random_access_iterator_tag, T> {
+
 public:
  FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
      : Array(Array), Index(Index) {}

-  bool operator==(const FixedStreamArrayIterator<T> &R) {
-    assert(&Array == &R.Array);
-    return Index == R.Index;
-  }
-
-  bool operator!=(const FixedStreamArrayIterator<T> &R) {
-    return !(*this == R);
+  FixedStreamArrayIterator<T> &
+  operator=(const FixedStreamArrayIterator<T> &Other) {
+    Array = Other.Array;
+    Index = Other.Index;
+    return *this;
  }

  const T &operator*() const { return Array[Index]; }

-  FixedStreamArrayIterator<T> &operator++() {
-    assert(Index < Array.size());
-    ++Index;
+  bool operator==(const FixedStreamArrayIterator<T> &R) const {
+    assert(Array == R.Array);
+    return (Index == R.Index) && (Array == R.Array);
+  }
+
+  FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
+    Index += N;
    return *this;
  }

-  FixedStreamArrayIterator<T> operator++(int) {
-    FixedStreamArrayIterator<T> Original = *this;
-    ++*this;
-    return Original;
+  FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
+    assert(Index >= N);
+    Index -= N;
+    return *this;
+  }
+
+  std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
+    assert(Array == R.Array);
+    assert(Index >= R.Index);
+    return Index - R.Index;
+  }
+
+  bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
+    assert(Array == RHS.Array);
+    return Index < RHS.Index;
  }

 private:
-  const FixedStreamArray<T> &Array;
+  FixedStreamArray<T> Array;
  uint32_t Index;
 };

--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@ -83,7 +83,7 @@ class SerializationTraits<
 namespace remote {

 class OrcRemoteTargetRPCAPI
-    : public rpc::SingleThreadedRPC<rpc::RawByteChannel> {
+    : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> {
 protected:
  class ResourceIdMgr {
  public:
@ -108,7 +108,7 @@ class OrcRemoteTargetRPCAPI
 public:
  // FIXME: Remove constructors once MSVC supports synthesizing move-ops.
  OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C)
-      : rpc::SingleThreadedRPC<rpc::RawByteChannel>(C, true) {}
+      : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(C, true) {}

  class CallIntVoid
      : public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> {
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@ -702,7 +702,7 @@ class CanDeserialize {
 /// sync.
 template <typename ImplT, typename ChannelT, typename FunctionIdT,
          typename SequenceNumberT>
-class RPCBase {
+class RPCEndpointBase {
 protected:
  class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> {
  public:
@ -747,7 +747,7 @@ class RPCBase {

 public:
  /// Construct an RPC instance on a channel.
-  RPCBase(ChannelT &C, bool LazyAutoNegotiation)
+  RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
      : C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
    // Hold ResponseId in a special variable, since we expect Response to be
    // called relatively frequently, and want to avoid the map lookup.
@ -788,15 +788,21 @@ class RPCBase {
      return FnIdOrErr.takeError();
    }

-    // Allocate a sequence number.
-    auto SeqNo = SequenceNumberMgr.getSequenceNumber();
-    assert(!PendingResponses.count(SeqNo) &&
-           "Sequence number already allocated");
+    SequenceNumberT SeqNo; // initialized in locked scope below.
+    {
+      // Lock the pending responses map and sequence number manager.
+      std::lock_guard<std::mutex> Lock(ResponsesMutex);

-    // Install the user handler.
-    PendingResponses[SeqNo] =
+      // Allocate a sequence number.
+      SeqNo = SequenceNumberMgr.getSequenceNumber();
+      assert(!PendingResponses.count(SeqNo) &&
+             "Sequence number already allocated");
+
+      // Install the user handler.
+      PendingResponses[SeqNo] =
        detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
            std::move(Handler));
+    }

    // Open the function call message.
    if (auto Err = C.startSendMessage(FnId, SeqNo)) {
@ -863,11 +869,33 @@ class RPCBase {
    return detail::ReadArgs<ArgTs...>(Args...);
  }

+  /// Abandon all outstanding result handlers.
+  ///
+  /// This will call all currently registered result handlers to receive an
+  /// "abandoned" error as their argument. This is used internally by the RPC
+  /// in error situations, but can also be called directly by clients who are
+  /// disconnecting from the remote and don't or can't expect responses to their
+  /// outstanding calls. (Especially for outstanding blocking calls, calling
+  /// this function may be necessary to avoid dead threads).
+  void abandonPendingResponses() {
+    // Lock the pending responses map and sequence number manager.
+    std::lock_guard<std::mutex> Lock(ResponsesMutex);
+
+    for (auto &KV : PendingResponses)
+      KV.second->abandon();
+    PendingResponses.clear();
+    SequenceNumberMgr.reset();
+  }
+
 protected:
  // The LaunchPolicy type allows a launch policy to be specified when adding
  // a function handler. See addHandlerImpl.
  using LaunchPolicy = std::function<Error(std::function<Error()>)>;

+  FunctionIdT getInvalidFunctionId() const {
+    return FnIdAllocator.getInvalidId();
+  }
+
  /// Add the given handler to the handler map and make it available for
  /// autonegotiation and execution.
  template <typename Func, typename HandlerT>
@ -884,28 +912,32 @@ class RPCBase {
        wrapHandler<Func>(std::move(Handler), std::move(Launch));
  }

-  // Abandon all outstanding results.
-  void abandonPendingResponses() {
-    for (auto &KV : PendingResponses)
-      KV.second->abandon();
-    PendingResponses.clear();
-    SequenceNumberMgr.reset();
-  }
-
  Error handleResponse(SequenceNumberT SeqNo) {
-    auto I = PendingResponses.find(SeqNo);
-    if (I == PendingResponses.end()) {
-      abandonPendingResponses();
-      return orcError(OrcErrorCode::UnexpectedRPCResponse);
+    using Handler = typename decltype(PendingResponses)::mapped_type;
+    Handler PRHandler;
+
+    {
+      // Lock the pending responses map and sequence number manager.
+      std::unique_lock<std::mutex> Lock(ResponsesMutex);
+      auto I = PendingResponses.find(SeqNo);
+
+      if (I != PendingResponses.end()) {
+        PRHandler = std::move(I->second);
+        PendingResponses.erase(I);
+        SequenceNumberMgr.releaseSequenceNumber(SeqNo);
+      } else {
+        // Unlock the pending results map to prevent recursive lock.
+        Lock.unlock();
+        abandonPendingResponses();
+        return orcError(OrcErrorCode::UnexpectedRPCResponse);
+      }
    }

-    auto PRHandler = std::move(I->second);
-    PendingResponses.erase(I);
-    SequenceNumberMgr.releaseSequenceNumber(SeqNo);
+    assert(PRHandler &&
+           "If we didn't find a response handler we should have bailed out");

    if (auto Err = PRHandler->handleResponse(C)) {
      abandonPendingResponses();
-      SequenceNumberMgr.reset();
      return Err;
    }

@ -915,7 +947,7 @@ class RPCBase {
  FunctionIdT handleNegotiate(const std::string &Name) {
    auto I = LocalFunctionIds.find(Name);
    if (I == LocalFunctionIds.end())
-      return FnIdAllocator.getInvalidId();
+      return getInvalidFunctionId();
    return I->second;
  }

@ -938,7 +970,7 @@ class RPCBase {

        // If autonegotiation indicates that the remote end doesn't support this
        // function, return an unknown function error.
-        if (RemoteId == FnIdAllocator.getInvalidId())
+        if (RemoteId == getInvalidFunctionId())
          return orcError(OrcErrorCode::UnknownRPCFunction);

        // Autonegotiation succeeded and returned a valid id. Update the map and
@ -1012,6 +1044,7 @@ class RPCBase {

  std::map<FunctionIdT, WrappedHandlerFn> Handlers;

+  std::mutex ResponsesMutex;
  detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
  std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
      PendingResponses;
@ -1021,17 +1054,18 @@ class RPCBase {

 template <typename ChannelT, typename FunctionIdT = uint32_t,
          typename SequenceNumberT = uint32_t>
-class MultiThreadedRPC
-    : public detail::RPCBase<
-          MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
-          FunctionIdT, SequenceNumberT> {
+class MultiThreadedRPCEndpoint
+    : public detail::RPCEndpointBase<
+          MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+          ChannelT, FunctionIdT, SequenceNumberT> {
 private:
  using BaseClass =
-      detail::RPCBase<MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
-                      ChannelT, FunctionIdT, SequenceNumberT>;
+      detail::RPCEndpointBase<
+        MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+        ChannelT, FunctionIdT, SequenceNumberT>;

 public:
-  MultiThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
+  MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
      : BaseClass(C, LazyAutoNegotiation) {}

  /// The LaunchPolicy type allows a launch policy to be specified when adding
@ -1061,30 +1095,41 @@ class MultiThreadedRPC
                                               std::move(Launch));
  }

+  /// Add a class-method as a handler.
+  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
+  void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...),
+                  LaunchPolicy Launch = LaunchPolicy()) {
+    addHandler<Func>(
+      detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method),
+      Launch);
+  }
+
  /// Negotiate a function id for Func with the other end of the channel.
-  template <typename Func> Error negotiateFunction() {
+  template <typename Func> Error negotiateFunction(bool Retry = false) {
    using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;

+    // Check if we already have a function id...
+    auto I = this->RemoteFunctionIds.find(Func::getPrototype());
+    if (I != this->RemoteFunctionIds.end()) {
+      // If it's valid there's nothing left to do.
+      if (I->second != this->getInvalidFunctionId())
+        return Error::success();
+      // If it's invalid and we can't re-attempt negotiation, throw an error.
+      if (!Retry)
+        return orcError(OrcErrorCode::UnknownRPCFunction);
+    }
+
+    // We don't have a function id for Func yet, call the remote to try to
+    // negotiate one.
    if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
      this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
+      if (*RemoteIdOrErr == this->getInvalidFunctionId())
+        return orcError(OrcErrorCode::UnknownRPCFunction);
      return Error::success();
    } else
      return RemoteIdOrErr.takeError();
  }

-  /// Convenience method for negotiating multiple functions at once.
-  template <typename Func> Error negotiateFunctions() {
-    return negotiateFunction<Func>();
-  }
-
-  /// Convenience method for negotiating multiple functions at once.
-  template <typename Func1, typename Func2, typename... Funcs>
-  Error negotiateFunctions() {
-    if (auto Err = negotiateFunction<Func1>())
-      return Err;
-    return negotiateFunctions<Func2, Funcs...>();
-  }
-
  /// Return type for non-blocking call primitives.
  template <typename Func>
  using NonBlockingCallResult = typename detail::ResultTraits<
@ -1169,19 +1214,20 @@ class MultiThreadedRPC

 template <typename ChannelT, typename FunctionIdT = uint32_t,
          typename SequenceNumberT = uint32_t>
-class SingleThreadedRPC
-    : public detail::RPCBase<
-          SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
-          FunctionIdT, SequenceNumberT> {
+class SingleThreadedRPCEndpoint
+    : public detail::RPCEndpointBase<
+          SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+          ChannelT, FunctionIdT, SequenceNumberT> {
 private:
  using BaseClass =
-      detail::RPCBase<SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
-                      ChannelT, FunctionIdT, SequenceNumberT>;
+      detail::RPCEndpointBase<
+        SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
+        ChannelT, FunctionIdT, SequenceNumberT>;

  using LaunchPolicy = typename BaseClass::LaunchPolicy;

 public:
-  SingleThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
+  SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
      : BaseClass(C, LazyAutoNegotiation) {}

  template <typename Func, typename HandlerT>
@ -1197,29 +1243,31 @@ class SingleThreadedRPC
  }

  /// Negotiate a function id for Func with the other end of the channel.
-  template <typename Func> Error negotiateFunction() {
+  template <typename Func> Error negotiateFunction(bool Retry = false) {
    using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;

+    // Check if we already have a function id...
+    auto I = this->RemoteFunctionIds.find(Func::getPrototype());
+    if (I != this->RemoteFunctionIds.end()) {
+      // If it's valid there's nothing left to do.
+      if (I->second != this->getInvalidFunctionId())
+        return Error::success();
+      // If it's invalid and we can't re-attempt negotiation, throw an error.
+      if (!Retry)
+        return orcError(OrcErrorCode::UnknownRPCFunction);
+    }
+
+    // We don't have a function id for Func yet, call the remote to try to
+    // negotiate one.
    if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
      this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
+      if (*RemoteIdOrErr == this->getInvalidFunctionId())
+        return orcError(OrcErrorCode::UnknownRPCFunction);
      return Error::success();
    } else
      return RemoteIdOrErr.takeError();
  }

-  /// Convenience method for negotiating multiple functions at once.
-  template <typename Func> Error negotiateFunctions() {
-    return negotiateFunction<Func>();
-  }
-
-  /// Convenience method for negotiating multiple functions at once.
-  template <typename Func1, typename Func2, typename... Funcs>
-  Error negotiateFunctions() {
-    if (auto Err = negotiateFunction<Func1>())
-      return Err;
-    return negotiateFunctions<Func2, Funcs...>();
-  }
-
  template <typename Func, typename... ArgTs,
            typename AltRetT = typename Func::ReturnType>
  typename detail::ResultTraits<AltRetT>::ErrorReturnType
@ -1332,6 +1380,68 @@ template <typename RPCClass> class ParallelCallGroup {
  uint32_t NumOutstandingCalls;
 };

+/// @brief Convenience class for grouping RPC Functions into APIs that can be
+///        negotiated as a block.
+///
+template <typename... Funcs>
+class APICalls {
+public:
+
+  /// @brief Test whether this API contains Function F.
+  template <typename F>
+  class Contains {
+  public:
+    static const bool value = false;
+  };
+
+  /// @brief Negotiate all functions in this API.
+  template <typename RPCEndpoint>
+  static Error negotiate(RPCEndpoint &R) {
+    return Error::success();
+  }
+};
+
+template <typename Func, typename... Funcs>
+class APICalls<Func, Funcs...> {
+public:
+
+  template <typename F>
+  class Contains {
+  public:
+    static const bool value = std::is_same<F, Func>::value |
+                              APICalls<Funcs...>::template Contains<F>::value;
+  };
+
+  template <typename RPCEndpoint>
+  static Error negotiate(RPCEndpoint &R) {
+    if (auto Err = R.template negotiateFunction<Func>())
+      return Err;
+    return APICalls<Funcs...>::negotiate(R);
+  }
+
+};
+
+template <typename... InnerFuncs, typename... Funcs>
+class APICalls<APICalls<InnerFuncs...>, Funcs...> {
+public:
+
+  template <typename F>
+  class Contains {
+  public:
+    static const bool value =
+      APICalls<InnerFuncs...>::template Contains<F>::value |
+      APICalls<Funcs...>::template Contains<F>::value;
+  };
+
+  template <typename RPCEndpoint>
+  static Error negotiate(RPCEndpoint &R) {
+    if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
+      return Err;
+    return APICalls<Funcs...>::negotiate(R);
+  }
+
+};
+
 } // end namespace rpc
 } // end namespace orc
 } // end namespace llvm
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@ -48,9 +48,7 @@ class RawByteChannel {
  template <typename FunctionIdT, typename SequenceIdT>
  Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
    writeLock.lock();
-    if (auto Err = serializeSeq(*this, FnId, SeqNo))
-      return Err;
-    return Error::success();
+    return serializeSeq(*this, FnId, SeqNo);
  }

  /// Notify the channel that we're ending a message send.
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@ -28,14 +28,14 @@ template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {

 template <> struct MappingTraits<TypeTestResolution> {
  static void mapping(IO &io, TypeTestResolution &res) {
-    io.mapRequired("Kind", res.TheKind);
-    io.mapRequired("SizeBitWidth", res.SizeBitWidth);
+    io.mapOptional("Kind", res.TheKind);
+    io.mapOptional("SizeBitWidth", res.SizeBitWidth);
  }
 };

 template <> struct MappingTraits<TypeIdSummary> {
  static void mapping(IO &io, TypeIdSummary& summary) {
-    io.mapRequired("TTRes", summary.TTRes);
+    io.mapOptional("TTRes", summary.TTRes);
  }
 };

@ -53,7 +53,7 @@ namespace yaml {

 template <> struct MappingTraits<FunctionSummaryYaml> {
  static void mapping(IO &io, FunctionSummaryYaml& summary) {
-    io.mapRequired("TypeTests", summary.TypeTests);
+    io.mapOptional("TypeTests", summary.TypeTests);
  }
 };

@ -100,8 +100,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {

 template <> struct MappingTraits<ModuleSummaryIndex> {
  static void mapping(IO &io, ModuleSummaryIndex& index) {
-    io.mapRequired("GlobalValueMap", index.GlobalValueMap);
-    io.mapRequired("TypeIdMap", index.TypeIdMap);
+    io.mapOptional("GlobalValueMap", index.GlobalValueMap);
+    io.mapOptional("TypeIdMap", index.TypeIdMap);
  }
 };

--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@ -879,18 +879,22 @@ extern template class AnalysisManager<Function>;
 /// \brief Convenience typedef for the Function analysis manager.
 typedef AnalysisManager<Function> FunctionAnalysisManager;

-/// \brief A module analysis which acts as a proxy for a function analysis
-/// manager.
+/// \brief An analysis over an "outer" IR unit that provides access to an
+/// analysis manager over an "inner" IR unit.  The inner unit must be contained
+/// in the outer unit.
 ///
-/// This primarily proxies invalidation information from the module analysis
-/// manager and module pass manager to a function analysis manager. You should
-/// never use a function analysis manager from within (transitively) a module
-/// pass manager unless your parent module pass has received a proxy result
-/// object for it.
+/// Fore example, InnerAnalysisManagerProxy<FunctionAnalysisManager, Module> is
+/// an analysis over Modules (the "outer" unit) that provides access to a
+/// Function analysis manager.  The FunctionAnalysisManager is the "inner"
+/// manager being proxied, and Functions are the "inner" unit.  The inner/outer
+/// relationship is valid because each Function is contained in one Module.
 ///
-/// Note that the proxy's result is a move-only object and represents ownership
-/// of the validity of the analyses in the \c FunctionAnalysisManager it
-/// provides.
+/// If you're (transitively) within a pass manager for an IR unit U that
+/// contains IR unit V, you should never use an analysis manager over V, except
+/// via one of these proxies.
+///
+/// Note that the proxy's result is a move-only RAII object.  The validity of
+/// the analyses in the inner analysis manager is tied to its lifetime.
 template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
 class InnerAnalysisManagerProxy
    : public AnalysisInfoMixin<
@ -926,23 +930,16 @@ class InnerAnalysisManagerProxy
    /// \brief Accessor for the analysis manager.
    AnalysisManagerT &getManager() { return *InnerAM; }

-    /// \brief Handler for invalidation of the outer IR unit.
+    /// \brief Handler for invalidation of the outer IR unit, \c IRUnitT.
    ///
-    /// If this analysis itself is preserved, then we assume that the set of \c
-    /// IR units that the inner analysis manager controls hasn't changed and
-    /// thus we don't need to invalidate *all* cached data associated with any
-    /// \c IRUnitT* in the \c AnalysisManagerT.
+    /// If the proxy analysis itself is not preserved, we assume that the set of
+    /// inner IR objects contained in IRUnit may have changed.  In this case,
+    /// we have to call \c clear() on the inner analysis manager, as it may now
+    /// have stale pointers to its inner IR objects.
    ///
-    /// Regardless of whether this analysis is marked as preserved, all of the
-    /// analyses in the \c AnalysisManagerT are potentially invalidated (for
-    /// the relevant inner set of their IR units) based on the set of preserved
-    /// analyses.
-    ///
-    /// Because this needs to understand the mapping from one IR unit to an
-    /// inner IR unit, this method isn't defined in the primary template.
-    /// Instead, each specialization of this template will need to provide an
-    /// explicit specialization of this method to handle that particular pair
-    /// of IR unit and inner AnalysisManagerT.
+    /// Regardless of whether the proxy analysis is marked as preserved, all of
+    /// the analyses in the inner analysis manager are potentially invalidated
+    /// based on the set of preserved analyses.
    bool invalidate(
        IRUnitT &IR, const PreservedAnalyses &PA,
        typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &Inv);
@ -956,13 +953,9 @@ class InnerAnalysisManagerProxy

  /// \brief Run the analysis pass and create our proxy result object.
  ///
-  /// This doesn't do any interesting work, it is primarily used to insert our
-  /// proxy result object into the module analysis cache so that we can proxy
-  /// invalidation to the function analysis manager.
-  ///
-  /// In debug builds, it will also assert that the analysis manager is empty
-  /// as no queries should arrive at the function analysis manager prior to
-  /// this analysis being requested.
+  /// This doesn't do any interesting work; it is primarily used to insert our
+  /// proxy result object into the outer analysis cache so that we can proxy
+  /// invalidation to the inner analysis manager.
  Result run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
             ExtraArgTs...) {
    return Result(*InnerAM);
@ -996,22 +989,24 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
 extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
                                                Module>;

-/// \brief A function analysis which acts as a proxy for a module analysis
-/// manager.
+/// \brief An analysis over an "inner" IR unit that provides access to an
+/// analysis manager over a "outer" IR unit.  The inner unit must be contained
+/// in the outer unit.
 ///
-/// This primarily provides an accessor to a parent module analysis manager to
-/// function passes. Only the const interface of the module analysis manager is
-/// provided to indicate that once inside of a function analysis pass you
-/// cannot request a module analysis to actually run. Instead, the user must
-/// rely on the \c getCachedResult API.
+/// For example OuterAnalysisManagerProxy<ModuleAnalysisManager, Function> is an
+/// analysis over Functions (the "inner" unit) which provides access to a Module
+/// analysis manager.  The ModuleAnalysisManager is the "outer" manager being
+/// proxied, and Modules are the "outer" IR unit.  The inner/outer relationship
+/// is valid because each Function is contained in one Module.
 ///
-/// The invalidation provided by this proxy involves tracking when an
-/// invalidation event in the outer analysis manager needs to trigger an
-/// invalidation of a particular analysis on this IR unit.
+/// This proxy only exposes the const interface of the outer analysis manager,
+/// to indicate that you cannot cause an outer analysis to run from within an
+/// inner pass.  Instead, you must rely on the \c getCachedResult API.
 ///
-/// Because outer analyses aren't invalidated while these IR units are being
-/// precessed, we have to register and handle these as deferred invalidation
-/// events.
+/// This proxy doesn't manage invalidation in any way -- that is handled by the
+/// recursive return path of each layer of the pass manager.  A consequence of
+/// this is the outer analyses may be stale.  We invalidate the outer analyses
+/// only when we're done running passes over the inner IR units.
 template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
 class OuterAnalysisManagerProxy
    : public AnalysisInfoMixin<
@ -1024,7 +1019,7 @@ class OuterAnalysisManagerProxy

    const AnalysisManagerT &getManager() const { return *AM; }

-    /// \brief Handle invalidation by ignoring it, this pass is immutable.
+    /// \brief Handle invalidation by ignoring it; this pass is immutable.
    bool invalidate(
        IRUnitT &, const PreservedAnalyses &,
        typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &) {
@ -1089,18 +1084,15 @@ AnalysisKey

 extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
                                                Function>;
-/// Provide the \c ModuleAnalysisManager to \c Fucntion proxy.
+/// Provide the \c ModuleAnalysisManager to \c Function proxy.
 typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
    ModuleAnalysisManagerFunctionProxy;

 /// \brief Trivial adaptor that maps from a module to its functions.
 ///
 /// Designed to allow composition of a FunctionPass(Manager) and
-/// a ModulePassManager. Note that if this pass is constructed with a pointer
-/// to a \c ModuleAnalysisManager it will run the
-/// \c FunctionAnalysisManagerModuleProxy analysis prior to running the function
-/// pass over the module to enable a \c FunctionAnalysisManager to be used
-/// within this run safely.
+/// a ModulePassManager, by running the FunctionPass(Manager) over every
+/// function in the module.
 ///
 /// Function passes run within this adaptor can rely on having exclusive access
 /// to the function they are run over. They should not read or modify any other
@ -1115,6 +1107,10 @@ typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>
 /// module.
 /// FIXME: Make the above true for all of LLVM's actual passes, some still
 /// violate this principle.
+///
+/// Note that although function passes can access module analyses, module
+/// analyses are not invalidated while the function passes are running, so they
+/// may be stale.  Function analyses will not be stale.
 template <typename FunctionPassT>
 class ModuleToFunctionPassAdaptor
    : public PassInfoMixin<ModuleToFunctionPassAdaptor<FunctionPassT>> {
@ -1124,7 +1120,6 @@ class ModuleToFunctionPassAdaptor

  /// \brief Runs the function pass across every function in the module.
  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
-    // Setup the function analysis manager from its proxy.
    FunctionAnalysisManager &FAM =
        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

@ -1145,10 +1140,11 @@ class ModuleToFunctionPassAdaptor
      PA.intersect(std::move(PassPA));
    }

-    // By definition we preserve the proxy. We also preserve all analyses on
-    // Function units. This precludes *any* invalidation of function analyses
-    // by the proxy, but that's OK because we've taken care to invalidate
-    // analyses in the function analysis manager incrementally above.
+    // The FunctionAnalysisManagerModuleProxy is preserved because (we assume)
+    // the function passes we ran didn't add or remove any functions.
+    //
+    // We also preserve all analyses on Functions, because we did all the
+    // invalidation we needed to do above.
    PA.preserveSet<AllAnalysesOn<Function>>();
    PA.preserve<FunctionAnalysisManagerModuleProxy>();
    return PA;
@ -1166,7 +1162,7 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
  return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
 }

-/// \brief A template utility pass to force an analysis result to be available.
+/// \brief A utility pass template to force an analysis result to be available.
 ///
 /// If there are extra arguments at the pass's run level there may also be
 /// extra arguments to the analysis manager's \c getResult routine. We can't
@ -1196,17 +1192,14 @@ struct RequireAnalysisPass
  }
 };

-/// \brief A template utility pass to force an analysis result to be
-/// invalidated.
-///
-/// This is a no-op pass which simply forces a specific analysis result to be
-/// invalidated when it is run.
+/// \brief A no-op pass template which simply forces a specific analysis result
+/// to be invalidated.
 template <typename AnalysisT>
 struct InvalidateAnalysisPass
    : PassInfoMixin<InvalidateAnalysisPass<AnalysisT>> {
  /// \brief Run this pass over some unit of IR.
  ///
-  /// This pass can be run over any unit of IR and use any analysis manager
+  /// This pass can be run over any unit of IR and use any analysis manager,
  /// provided they satisfy the basic API requirements. When this pass is
  /// created, these methods can be instantiated to satisfy whatever the
  /// context requires.
@ -1218,10 +1211,10 @@ struct InvalidateAnalysisPass
  }
 };

-/// \brief A utility pass that does nothing but preserves no analyses.
+/// \brief A utility pass that does nothing, but preserves no analyses.
 ///
-/// As a consequence fo not preserving any analyses, this pass will force all
-/// analysis passes to be re-run to produce fresh results if any are needed.
+/// Because this preserves no analyses, any analysis passes queried after this
+/// pass runs will recompute fresh results.
 struct InvalidateAllAnalysesPass : PassInfoMixin<InvalidateAllAnalysesPass> {
  /// \brief Run this pass over some unit of IR.
  template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
--- a/contrib/llvm/include/llvm/IR/User.h
+++ b/contrib/llvm/include/llvm/IR/User.h
@ -238,6 +238,26 @@ class User : public Value {
    return make_range(value_op_begin(), value_op_end());
  }

+  struct const_value_op_iterator
+      : iterator_adaptor_base<const_value_op_iterator, const_op_iterator,
+                              std::random_access_iterator_tag, const Value *,
+                              ptrdiff_t, const Value *, const Value *> {
+    explicit const_value_op_iterator(const Use *U = nullptr) :
+      iterator_adaptor_base(U) {}
+    const Value *operator*() const { return *I; }
+    const Value *operator->() const { return operator*(); }
+  };
+
+  const_value_op_iterator value_op_begin() const {
+    return const_value_op_iterator(op_begin());
+  }
+  const_value_op_iterator value_op_end() const {
+    return const_value_op_iterator(op_end());
+  }
+  iterator_range<const_value_op_iterator> operand_values() const {
+    return make_range(value_op_begin(), value_op_end());
+  }
+
  /// \brief Drop all references to operands.
  ///
  /// This function is in charge of "letting go" of all objects that this User
--- a/contrib/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm/include/llvm/Support/Path.h
@ -207,6 +207,14 @@ void native(const Twine &path, SmallVectorImpl<char> &result);
 /// @param path A path that is transformed to native format.
 void native(SmallVectorImpl<char> &path);

+/// @brief Replaces backslashes with slashes if Windows.
+///
+/// @param path processed path
+/// @result The result of replacing backslashes with forward slashes if Windows.
+/// On Unix, this function is a no-op because backslashes are valid path
+/// chracters.
+std::string convert_to_slash(StringRef path);
+
 /// @}
 /// @name Lexical Observers
 /// @{
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@ -215,9 +215,20 @@ ModulePass *createMetaRenamerPass();
 /// manager.
 ModulePass *createBarrierNoopPass();

+/// What to do with the summary when running the LowerTypeTests pass.
+enum class LowerTypeTestsSummaryAction {
+  None,   ///< Do nothing.
+  Import, ///< Import typeid resolutions from summary and globals.
+  Export, ///< Export typeid resolutions to summary and globals.
+};
+
 /// \brief This pass lowers type metadata and the llvm.type.test intrinsic to
 /// bitsets.
-ModulePass *createLowerTypeTestsPass();
+/// \param Action What to do with the summary passed as Index.
+/// \param Index The summary to use for importing or exporting, this can be null
+///              when Action is None.
+ModulePass *createLowerTypeTestsPass(LowerTypeTestsSummaryAction Action,
+                                     ModuleSummaryIndex *Index);

 /// \brief This pass export CFI checks for use by external modules.
 ModulePass *createCrossDSOCFIPass();
--- a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@ -21,7 +21,6 @@
 #include <vector>

 namespace llvm {
-class ModuleSummaryIndex;
 class Pass;
 class TargetLibraryInfoImpl;
 class TargetMachine;
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@ -1106,6 +1106,16 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
    return V;

+  // udiv %V, C -> 0 if %V < C
+  if (MaxRecurse) {
+    if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
+            ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
+      if (C->isAllOnesValue()) {
+        return Constant::getNullValue(Op0->getType());
+      }
+    }
+  }
+
  return nullptr;
 }

@ -1247,6 +1257,16 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
    return V;

+  // urem %V, C -> %V if %V < C
+  if (MaxRecurse) {
+    if (Constant *C = dyn_cast_or_null<Constant>(SimplifyICmpInst(
+            ICmpInst::ICMP_ULT, Op0, Op1, Q, MaxRecurse - 1))) {
+      if (C->isAllOnesValue()) {
+        return Op0;
+      }
+    }
+  }
+
  return nullptr;
 }

--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@ -179,9 +179,9 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
 }

 bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
-  // For each block we check that it doesn't have any uses outside of it's
-  // innermost loop. This process will transitivelly guarntee that current loop 
-  // and all of the nested loops are in the LCSSA form.
+  // For each block we check that it doesn't have any uses outside of its
+  // innermost loop. This process will transitively guarantee that the current
+  // loop and all of the nested loops are in LCSSA form.
  return all_of(this->blocks(), [&](const BasicBlock *BB) {
    return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
  });
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@ -344,38 +344,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
  if (!InvariantGroupMD)
    return MemDepResult::getUnknown();

-  Value *LoadOperand = LI->getPointerOperand();
+  // Take the ptr operand after all casts and geps 0. This way we can search
+  // cast graph down only.
+  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
+
  // It's is not safe to walk the use list of global value, because function
  // passes aren't allowed to look outside their functions.
+  // FIXME: this could be fixed by filtering instructions from outside
+  // of current function.
  if (isa<GlobalValue>(LoadOperand))
    return MemDepResult::getUnknown();

  // Queue to process all pointers that are equivalent to load operand.
  SmallVector<const Value *, 8> LoadOperandsQueue;
-  SmallSet<const Value *, 14> SeenValues;
-  auto TryInsertToQueue = [&](Value *V) {
-    if (SeenValues.insert(V).second)
-      LoadOperandsQueue.push_back(V);
-  };
-
-  TryInsertToQueue(LoadOperand);
+  LoadOperandsQueue.push_back(LoadOperand);
  while (!LoadOperandsQueue.empty()) {
    const Value *Ptr = LoadOperandsQueue.pop_back_val();
-    assert(Ptr);
-    if (isa<GlobalValue>(Ptr))
-      continue;
-
-    // Value comes from bitcast: Ptr = bitcast x. Insert x.
-    if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
-      TryInsertToQueue(BCI->getOperand(0));
-    // Gep with zeros is equivalent to bitcast.
-    // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
-    // or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
-    // pointers will be upstream then both cases will be gone (and this BFS
-    // also won't be needed).
-    if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
-      if (GEP->hasAllZeroIndices())
-        TryInsertToQueue(GEP->getOperand(0));
+    assert(Ptr && !isa<GlobalValue>(Ptr) &&
+           "Null or GlobalValue should not be inserted");

    for (const Use &Us : Ptr->uses()) {
      auto *U = dyn_cast<Instruction>(Us.getUser());
@ -385,13 +371,17 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
      // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
      // users.      U = bitcast Ptr
      if (isa<BitCastInst>(U)) {
-        TryInsertToQueue(U);
+        LoadOperandsQueue.push_back(U);
        continue;
      }
-      // U = getelementptr Ptr, 0, 0...
+      // Gep with zeros is equivalent to bitcast.
+      // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
+      // or gep 0 to bitcast because of SROA, so there are 2 forms. When
+      // typeless pointers will be ready then both cases will be gone
+      // (and this BFS also won't be needed).
      if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
        if (GEP->hasAllZeroIndices()) {
-          TryInsertToQueue(U);
+          LoadOperandsQueue.push_back(U);
          continue;
        }

--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@ -10012,6 +10012,18 @@ void ScalarEvolution::verify() const {
  // TODO: Verify more things.
 }

+bool ScalarEvolution::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  // Invalidate the ScalarEvolution object whenever it isn't preserved or one
+  // of its dependencies is invalidated.
+  auto PAC = PA.getChecker<ScalarEvolutionAnalysis>();
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+         Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+         Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
+         Inv.invalidate<LoopAnalysis>(F, PA);
+}
+
 AnalysisKey ScalarEvolutionAnalysis::Key;

 ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@ -3257,6 +3257,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
      case Intrinsic::dbg_value:
        return true;

+      case Intrinsic::bitreverse:
      case Intrinsic::bswap:
      case Intrinsic::ctlz:
      case Intrinsic::ctpop:
--- a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@ -429,7 +429,7 @@ class MetadataLoader::MetadataLoaderImpl {
  /// Populate the index above to enable lazily loading of metadata, and load
  /// the named metadata as well as the transitively referenced global
  /// Metadata.
-  Expected<bool> lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders);
+  Expected<bool> lazyLoadModuleMetadataBlock();

  /// On-demand loading of a single metadata. Requires the index above to be
  /// populated.
@ -516,8 +516,8 @@ Error error(const Twine &Message) {
      Message, make_error_code(BitcodeError::CorruptedBitcode));
 }

-Expected<bool> MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock(
-    PlaceholderQueue &Placeholders) {
+Expected<bool>
+MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
  IndexCursor = Stream;
  SmallVector<uint64_t, 64> Record;
  // Get the abbrevs, and preload record positions to make them lazy-loadable.
@ -701,7 +701,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
  // then load individual record as needed, starting with the named metadata.
  if (ModuleLevel && IsImporting && MetadataList.empty() &&
      !DisableLazyLoading) {
-    auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders);
+    auto SuccessOrErr = lazyLoadModuleMetadataBlock();
    if (!SuccessOrErr)
      return SuccessOrErr.takeError();
    if (SuccessOrErr.get()) {
@ -1561,7 +1561,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
    return error("Invalid record");

  SmallVector<uint64_t, 64> Record;
-
  PlaceholderQueue Placeholders;

  while (true) {
@ -1608,10 +1607,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(

        auto Idx = Record[i + 1];
        if (Idx < (MDStringRef.size() + GlobalMetadataBitPosIndex.size()) &&
-            !MetadataList.lookup(Idx))
+            !MetadataList.lookup(Idx)) {
          // Load the attachment if it is in the lazy-loadable range and hasn't
          // been loaded yet.
          lazyLoadOneMetadata(Idx, Placeholders);
+          resolveForwardRefsAndPlaceholders(Placeholders);
+        }

        Metadata *Node = MetadataList.getMetadataFwdRef(Idx);
        if (isa<LocalAsMetadata>(Node))
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -1714,7 +1714,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
  EVT CCT = getSetCCResultType(NVT);

  // Hi part is always the same op
-  Hi = DAG.getNode(N->getOpcode(), DL, {NVT, NVT}, {LHSH, RHSH});
+  Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});

  // We need to know whether to select Lo part that corresponds to 'winning'
  // Hi part or if Hi parts are equal.
@ -1725,7 +1725,7 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
  SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);

  // Recursed Lo part if Hi parts are equal, this uses unsigned version
-  SDValue LoMinMax = DAG.getNode(LoOpc, DL, {NVT, NVT}, {LHSL, RHSL});
+  SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});

  Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
 }
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@ -381,7 +381,6 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
       I != E; ++I) {
    if (DCELimit != -1 && (int)NumDead >= DCELimit)
      break;
-
    int FirstSS, SecondSS;
    if (TII->isStackSlotCopy(*I, FirstSS, SecondSS) && FirstSS == SecondSS &&
        FirstSS != -1) {
@ -392,12 +391,18 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
    }

    MachineBasicBlock::iterator NextMI = std::next(I);
-    if (NextMI == MBB->end()) continue;
+    MachineBasicBlock::iterator ProbableLoadMI = I;

    unsigned LoadReg = 0;
    unsigned StoreReg = 0;
    if (!(LoadReg = TII->isLoadFromStackSlot(*I, FirstSS)))
      continue;
+    // Skip the ...pseudo debugging... instructions between a load and store.
+    while ((NextMI != E) && NextMI->isDebugValue()) {
+      ++NextMI;
+      ++I;
+    }
+    if (NextMI == E) continue;
    if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS)))
      continue;
    if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
@ -407,7 +412,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {

    if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
      ++NumDead;
-      toErase.push_back(&*I);
+      toErase.push_back(&*ProbableLoadMI);
    }

    toErase.push_back(&*NextMI);
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@ -896,6 +896,48 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
  return ELF::R_MIPS_NONE;
 }

+// Sometimes we don't need to create thunk for a branch.
+// This typically happens when branch target is located 
+// in the same object file. In such case target is either
+// a weak symbol or symbol in a different executable section.
+// This function checks if branch target is located in the
+// same object file and if distance between source and target
+// fits R_AARCH64_CALL26 relocation. If both conditions are
+// met, it emits direct jump to the target and returns true.
+// Otherwise false is returned and thunk is created.
+bool RuntimeDyldELF::resolveAArch64ShortBranch(
+    unsigned SectionID, relocation_iterator RelI,
+    const RelocationValueRef &Value) {
+  uint64_t Address;
+  if (Value.SymbolName) {
+    auto Loc = GlobalSymbolTable.find(Value.SymbolName);
+
+    // Don't create direct branch for external symbols.
+    if (Loc == GlobalSymbolTable.end())
+      return false;
+
+    const auto &SymInfo = Loc->second;
+    Address =
+        uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset(
+            SymInfo.getOffset()));
+  } else {
+    Address = uint64_t(Sections[Value.SectionID].getLoadAddress());
+  }
+  uint64_t Offset = RelI->getOffset();
+  uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset);
+
+  // R_AARCH64_CALL26 requires immediate to be in range -2^27 <= imm < 2^27
+  // If distance between source and target is out of range then we should
+  // create thunk.
+  if (!isInt<28>(Address + Value.Addend - SourceAddress))
+    return false;
+
+  resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(),
+                    Value.Addend);
+
+  return true;
+}
+
 Expected<relocation_iterator>
 RuntimeDyldELF::processRelocationRef(
    unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
@ -1003,7 +1045,7 @@ RuntimeDyldELF::processRelocationRef(
                        (uint64_t)Section.getAddressWithOffset(i->second),
                        RelType, 0);
      DEBUG(dbgs() << " Stub function found\n");
-    } else {
+    } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
      // Create a new stub function.
      DEBUG(dbgs() << " Create a new stub function\n");
      Stubs[Value] = Section.getStubOffset();
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@ -40,6 +40,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
  void resolveAArch64Relocation(const SectionEntry &Section, uint64_t Offset,
                                uint64_t Value, uint32_t Type, int64_t Addend);

+  bool resolveAArch64ShortBranch(unsigned SectionID, relocation_iterator RelI,
+                                 const RelocationValueRef &Value);
+
  void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
                            uint32_t Value, uint32_t Type, int32_t Addend);

--- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@ -196,8 +196,15 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
  };

  FunctionImporter Importer(Index, Loader);
-  if (!Importer.importFunctions(TheModule, ImportList))
+  Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
+  if (!Result) {
+    handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
+      SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
+                                      SourceMgr::DK_Error, EIB.message());
+      Err.print("ThinLTO", errs());
+    });
    report_fatal_error("importFunctions failed");
+  }
 }

 static void optimizeModule(Module &TheModule, TargetMachine &TM,
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@ -2823,7 +2823,11 @@ StringRef MachORebaseEntry::typeName() const {
 }

 bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
+#ifdef EXPENSIVE_CHECKS
  assert(Opcodes == Other.Opcodes && "compare iterators of different files");
+#else
+  assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
+#endif
  return (Ptr == Other.Ptr) &&
         (RemainingLoopCount == Other.RemainingLoopCount) &&
         (Done == Other.Done);
@ -3073,7 +3077,11 @@ uint32_t MachOBindEntry::flags() const { return Flags; }
 int MachOBindEntry::ordinal() const { return Ordinal; }

 bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
+#ifdef EXPENSIVE_CHECKS
  assert(Opcodes == Other.Opcodes && "compare iterators of different files");
+#else
+  assert(Opcodes.data() == Other.Opcodes.data() && "compare iterators of different files");
+#endif
  return (Ptr == Other.Ptr) &&
         (RemainingLoopCount == Other.RemainingLoopCount) &&
         (Done == Other.Done);
--- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp
@ -22,6 +22,12 @@
 using namespace llvm;
 using namespace object;

+static llvm::cl::opt<bool> IgnoreEmptyThinLTOIndexFile(
+    "ignore-empty-index-file", llvm::cl::ZeroOrMore,
+    llvm::cl::desc(
+        "Ignore an empty index file and perform non-ThinLTO compilation"),
+    llvm::cl::init(false));
+
 ModuleSummaryIndexObjectFile::ModuleSummaryIndexObjectFile(
    MemoryBufferRef Object, std::unique_ptr<ModuleSummaryIndex> I)
    : SymbolicFile(Binary::ID_ModuleSummaryIndex, Object), Index(std::move(I)) {
@ -97,6 +103,8 @@ llvm::getModuleSummaryIndexForFile(StringRef Path) {
  if (EC)
    return errorCodeToError(EC);
  MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
+  if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize())
+    return nullptr;
  Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
      object::ModuleSummaryIndexObjectFile::create(BufferRef);
  if (!ObjOrErr)
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@ -373,7 +373,7 @@ void Option::removeArgument() { GlobalParser->removeOption(this); }
 void Option::setArgStr(StringRef S) {
  if (FullyInitialized)
    GlobalParser->updateArgStr(this, S);
-  assert(S[0] != '-' && "Option can't start with '-");
+  assert((S.empty() || S[0] != '-') && "Option can't start with '-");
  ArgStr = S;
 }

--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@ -571,6 +571,16 @@ void native(SmallVectorImpl<char> &Path) {
 #endif
 }

+std::string convert_to_slash(StringRef path) {
+#ifdef LLVM_ON_WIN32
+  std::string s = path.str();
+  std::replace(s.begin(), s.end(), '\\', '/');
+  return s;
+#else
+  return path;
+#endif
+}
+
 StringRef filename(StringRef path) {
  return *rbegin(path);
 }
--- a/contrib/llvm/lib/Support/TarWriter.cpp
+++ b/contrib/llvm/lib/Support/TarWriter.cpp
@ -26,6 +26,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"

 using namespace llvm;

@ -109,27 +110,44 @@ static void writePaxHeader(raw_fd_ostream &OS, StringRef Path) {
  pad(OS);
 }

+// In the Ustar header, a path can be split at any '/' to store
+// a path into UstarHeader::Name and UstarHeader::Prefix. This
+// function splits a given path for that purpose.
+static std::pair<StringRef, StringRef> splitPath(StringRef Path) {
+  if (Path.size() <= sizeof(UstarHeader::Name))
+    return {"", Path};
+  size_t Sep = Path.rfind('/', sizeof(UstarHeader::Name) + 1);
+  if (Sep == StringRef::npos)
+    return {"", Path};
+  return {Path.substr(0, Sep), Path.substr(Sep + 1)};
+}
+
+// Returns true if a given path can be stored to a Ustar header
+// without the PAX extension.
+static bool fitsInUstar(StringRef Path) {
+  StringRef Prefix;
+  StringRef Name;
+  std::tie(Prefix, Name) = splitPath(Path);
+  return Name.size() <= sizeof(UstarHeader::Name);
+}
+
 // The PAX header is an extended format, so a PAX header needs
 // to be followed by a "real" header.
 static void writeUstarHeader(raw_fd_ostream &OS, StringRef Path, size_t Size) {
+  StringRef Prefix;
+  StringRef Name;
+  std::tie(Prefix, Name) = splitPath(Path);
+
  UstarHeader Hdr = {};
-  memcpy(Hdr.Name, Path.data(), Path.size());
+  memcpy(Hdr.Name, Name.data(), Name.size());
  memcpy(Hdr.Mode, "0000664", 8);
  snprintf(Hdr.Size, sizeof(Hdr.Size), "%011zo", Size);
  memcpy(Hdr.Magic, "ustar", 6);
+  memcpy(Hdr.Prefix, Prefix.data(), Prefix.size());
  computeChecksum(Hdr);
  OS << StringRef(reinterpret_cast<char *>(&Hdr), sizeof(Hdr));
 }

-// We want to use '/' as a path separator even on Windows.
-// This function canonicalizes a given path.
-static std::string canonicalize(std::string S) {
-#ifdef LLVM_ON_WIN32
-  std::replace(S.begin(), S.end(), '\\', '/');
-#endif
-  return S;
-}
-
 // Creates a TarWriter instance and returns it.
 Expected<std::unique_ptr<TarWriter>> TarWriter::create(StringRef OutputPath,
                                                       StringRef BaseDir) {
@ -145,8 +163,8 @@ TarWriter::TarWriter(int FD, StringRef BaseDir)
 // Append a given file to an archive.
 void TarWriter::append(StringRef Path, StringRef Data) {
  // Write Path and Data.
-  std::string S = BaseDir + "/" + canonicalize(Path) + "\0";
-  if (S.size() <= sizeof(UstarHeader::Name)) {
+  std::string S = BaseDir + "/" + sys::path::convert_to_slash(Path) + "\0";
+  if (fitsInUstar(S)) {
    writeUstarHeader(OS, S, Data.size());
  } else {
    writePaxHeader(OS, S);
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@ -608,6 +608,10 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
  if ((C = dyn_cast<ConstantSDNode>(Addr))) {
    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+  } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+             (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+    Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+    Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
  } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
            (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
    Base = Addr.getOperand(0);
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@ -172,16 +172,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::STORE, MVT::v2f64, Promote);
  AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);

-  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
-  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
-
-  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
-  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
-
-  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
-  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
-  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
-
  setTruncStoreAction(MVT::i64, MVT::i1, Expand);
  setTruncStoreAction(MVT::i64, MVT::i8, Expand);
  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -822,6 +822,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
  bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
  bool isForcedDPP() const { return ForcedDPP; }
  bool isForcedSDWA() const { return ForcedSDWA; }
+  ArrayRef<unsigned> getMatchedVariants() const;

  std::unique_ptr<AMDGPUOperand> parseRegister();
  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
@ -1630,31 +1631,44 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
  return Match_Success;
 }

+// What asm variants we should check
+ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
+  if (getForcedEncodingSize() == 32) {
+    static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
+    return makeArrayRef(Variants);
+  }
+
+  if (isForcedVOP3()) {
+    static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
+    return makeArrayRef(Variants);
+  }
+
+  if (isForcedSDWA()) {
+    static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA};
+    return makeArrayRef(Variants);
+  }
+
+  if (isForcedDPP()) {
+    static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
+    return makeArrayRef(Variants);
+  }
+
+  static const unsigned Variants[] = {
+    AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
+    AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP
+  };
+
+  return makeArrayRef(Variants);
+}
+
 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                              OperandVector &Operands,
                                              MCStreamer &Out,
                                              uint64_t &ErrorInfo,
                                              bool MatchingInlineAsm) {
-  // What asm variants we should check
-  std::vector<unsigned> MatchedVariants;
-  if (getForcedEncodingSize() == 32) {
-    MatchedVariants = {AMDGPUAsmVariants::DEFAULT};
-  } else if (isForcedVOP3()) {
-    MatchedVariants = {AMDGPUAsmVariants::VOP3};
-  } else if (isForcedSDWA()) {
-    MatchedVariants = {AMDGPUAsmVariants::SDWA};
-  } else if (isForcedDPP()) {
-    MatchedVariants = {AMDGPUAsmVariants::DPP};
-  } else {
-    MatchedVariants = {AMDGPUAsmVariants::DEFAULT,
-                       AMDGPUAsmVariants::VOP3,
-                       AMDGPUAsmVariants::SDWA,
-                       AMDGPUAsmVariants::DPP};
-  }
-
  MCInst Inst;
  unsigned Result = Match_Success;
-  for (auto Variant : MatchedVariants) {
+  for (auto Variant : getMatchedVariants()) {
    uint64_t EI;
    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
                                  Variant);
@ -3486,7 +3500,7 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
  for (unsigned E = Operands.size(); I != E; ++I) {
    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
    // Add the register arguments
-    if ((BasicInstType == SIInstrFlags::VOPC || 
+    if ((BasicInstType == SIInstrFlags::VOPC ||
         BasicInstType == SIInstrFlags::VOP2)&&
        Op.isReg() &&
        Op.Reg.RegNo == AMDGPU::VCC) {
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@ -99,6 +99,18 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,

  setTruncStoreAction(MVT::i32, MVT::i8, Custom);
  setTruncStoreAction(MVT::i32, MVT::i16, Custom);
+  // We need to include these since trunc STORES to PRIVATE need
+  // special handling to accommodate RMW
+  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
+  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
+  setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
+  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
+  setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
+  setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);

  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
  setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
@ -1087,79 +1099,114 @@ void R600TargetLowering::getStackAddress(unsigned StackWidth,
 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
                                                   SelectionDAG &DAG) const {
  SDLoc DL(Store);
+  //TODO: Who creates the i8 stores?
+  assert(Store->isTruncatingStore()
+         || Store->getValue().getValueType() == MVT::i8);
+  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);

-  unsigned Mask = 0;
+  SDValue Mask;
  if (Store->getMemoryVT() == MVT::i8) {
-    Mask = 0xff;
+    assert(Store->getAlignment() >= 1);
+    Mask = DAG.getConstant(0xff, DL, MVT::i32);
  } else if (Store->getMemoryVT() == MVT::i16) {
-    Mask = 0xffff;
+    assert(Store->getAlignment() >= 2);
+    Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
+  } else {
+    llvm_unreachable("Unsupported private trunc store");
  }

  SDValue Chain = Store->getChain();
  SDValue BasePtr = Store->getBasePtr();
+  SDValue Offset = Store->getOffset();
  EVT MemVT = Store->getMemoryVT();

-  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
-                            DAG.getConstant(2, DL, MVT::i32));
-  SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
-                            Chain, Ptr,
-                            DAG.getTargetConstant(0, DL, MVT::i32));
+  SDValue LoadPtr = BasePtr;
+  if (!Offset.isUndef()) {
+    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
+  }

-  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
+  // Get dword location
+  // TODO: this should be eliminated by the future SHR ptr, 2
+  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
+                            DAG.getConstant(0xfffffffc, DL, MVT::i32));
+
+  // Load dword
+  // TODO: can we be smarter about machine pointer info?
+  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
+
+  Chain = Dst.getValue(1);
+
+  // Get offset in dword
+  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
                                DAG.getConstant(0x3, DL, MVT::i32));

+  // Convert byte offset to bit shift
  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
                                 DAG.getConstant(3, DL, MVT::i32));

+  // TODO: Contrary to the name of the functiom,
+  // it also handles sub i32 non-truncating stores (like i1)
  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
                                  Store->getValue());

+  // Mask the value to the right type
  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);

+  // Shift the value in place
  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
                                     MaskedValue, ShiftAmt);

-  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
-                                DAG.getConstant(Mask, DL, MVT::i32),
-                                ShiftAmt);
-  DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
-                        DAG.getConstant(0xffffffff, DL, MVT::i32));
+  // Shift the mask in place
+  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
+
+  // Invert the mask. NOTE: if we had native ROL instructions we could
+  // use inverted mask
+  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
+
+  // Cleanup the target bits
  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);

+  // Add the new bits
  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
-  return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
-                     Chain, Value, Ptr,
-                     DAG.getTargetConstant(0, DL, MVT::i32));
+
+  // Store dword
+  // TODO: Can we be smarter about MachinePointerInfo?
+  return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
 }

 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
  unsigned AS = StoreNode->getAddressSpace();
-  SDValue Value = StoreNode->getValue();
-  EVT ValueVT = Value.getValueType();
-  EVT MemVT = StoreNode->getMemoryVT();
-  unsigned Align = StoreNode->getAlignment();

+  SDValue Chain = StoreNode->getChain();
+  SDValue Ptr = StoreNode->getBasePtr();
+  SDValue Value = StoreNode->getValue();
+
+  EVT VT = Value.getValueType();
+  EVT MemVT = StoreNode->getMemoryVT();
+  EVT PtrVT = Ptr.getValueType();
+
+  SDLoc DL(Op);
+
+  // Neither LOCAL nor PRIVATE can do vectors at the moment
  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
-      ValueVT.isVector()) {
-    return SplitVectorStore(Op, DAG);
+      VT.isVector()) {
+    return scalarizeVectorStore(StoreNode, DAG);
  }

-  // Private AS needs special fixes
-  if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) &&
+  unsigned Align = StoreNode->getAlignment();
+  if (Align < MemVT.getStoreSize() &&
      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
    return expandUnalignedStore(StoreNode, DAG);
  }

-  SDLoc DL(Op);
-  SDValue Chain = StoreNode->getChain();
-  SDValue Ptr = StoreNode->getBasePtr();
+  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
+                                  DAG.getConstant(2, DL, PtrVT));

  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
    // It is beneficial to create MSKOR here instead of combiner to avoid
    // artificial dependencies introduced by RMW
    if (StoreNode->isTruncatingStore()) {
-      EVT VT = Value.getValueType();
      assert(VT.bitsLE(MVT::i32));
      SDValue MaskConstant;
      if (MemVT == MVT::i8) {
@ -1169,15 +1216,19 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
        assert(StoreNode->getAlignment() >= 2);
        MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
      }
-      SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
-                                      DAG.getConstant(2, DL, MVT::i32));
-      SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
-                                      DAG.getConstant(0x00000003, DL, VT));
+
+      SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
+                                      DAG.getConstant(0x00000003, DL, PtrVT));
+      SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
+                                     DAG.getConstant(3, DL, VT));
+
+      // Put the mask in correct place
+      SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
+
+      // Put the mask in correct place
      SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
-      SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
-                                   DAG.getConstant(3, DL, VT));
-      SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
-      SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
+      SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
+
      // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
      // vector instead.
      SDValue Src[4] = {
@ -1191,12 +1242,9 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
      return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
                                     Op->getVTList(), Args, MemVT,
                                     StoreNode->getMemOperand());
-    } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
-               ValueVT.bitsGE(MVT::i32)) {
+    } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
      // Convert pointer from byte address to dword address.
-      Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
-                        DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
-                                    Ptr, DAG.getConstant(2, DL, MVT::i32)));
+      Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);

      if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
        llvm_unreachable("Truncated and indexed stores not supported yet");
@ -1207,49 +1255,22 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
    }
  }

+  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
    return SDValue();

  if (MemVT.bitsLT(MVT::i32))
    return lowerPrivateTruncStore(StoreNode, DAG);

-  // Lowering for indirect addressing
-  const MachineFunction &MF = DAG.getMachineFunction();
-  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
-  unsigned StackWidth = TFL->getStackWidth(MF);
-
-  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
-
-  if (ValueVT.isVector()) {
-    unsigned NumElemVT = ValueVT.getVectorNumElements();
-    EVT ElemVT = ValueVT.getVectorElementType();
-    SmallVector<SDValue, 4> Stores(NumElemVT);
-
-    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
-                                      "vector width in load");
-
-    for (unsigned i = 0; i < NumElemVT; ++i) {
-      unsigned Channel, PtrIncr;
-      getStackAddress(StackWidth, i, Channel, PtrIncr);
-      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
-                        DAG.getConstant(PtrIncr, DL, MVT::i32));
-      SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
-                                 Value, DAG.getConstant(i, DL, MVT::i32));
-
-      Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
-                              Chain, Elem, Ptr,
-                              DAG.getTargetConstant(Channel, DL, MVT::i32));
-    }
-     Chain =  DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
-   } else {
-    if (ValueVT == MVT::i8) {
-      Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
-    }
-    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
-    DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
+  // Standard i32+ store, tag it with DWORDADDR to note that the address
+  // has been shifted
+  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
+    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
+    return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
  }

-  return Chain;
+  // Tagged i32+ stores will be matched by patterns
+  return SDValue();
 }

 // return (512 + (kc_bank << 12)
@ -1299,51 +1320,50 @@ SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
  LoadSDNode *Load = cast<LoadSDNode>(Op);
  ISD::LoadExtType ExtType = Load->getExtensionType();
  EVT MemVT = Load->getMemoryVT();
+  assert(Load->getAlignment() >= MemVT.getStoreSize());

-  // <SI && AS=PRIVATE && EXTLOAD && size < 32bit,
-  // register (2-)byte extract.
+  SDValue BasePtr = Load->getBasePtr();
+  SDValue Chain = Load->getChain();
+  SDValue Offset = Load->getOffset();

-  // Get Register holding the target.
-  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
-                            DAG.getConstant(2, DL, MVT::i32));
-  // Load the Register.
-  SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
-                            Load->getChain(),
-                            Ptr,
-                            DAG.getTargetConstant(0, DL, MVT::i32),
-                            Op.getOperand(2));
+  SDValue LoadPtr = BasePtr;
+  if (!Offset.isUndef()) {
+    LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
+  }
+
+  // Get dword location
+  // NOTE: this should be eliminated by the future SHR ptr, 2
+  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
+                            DAG.getConstant(0xfffffffc, DL, MVT::i32));
+
+  // Load dword
+  // TODO: can we be smarter about machine pointer info?
+  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());

  // Get offset within the register.
  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
-                                Load->getBasePtr(),
-                                DAG.getConstant(0x3, DL, MVT::i32));
+                                LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));

  // Bit offset of target byte (byteIdx * 8).
  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
                                 DAG.getConstant(3, DL, MVT::i32));

  // Shift to the right.
-  Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
+  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);

  // Eliminate the upper bits by setting them to ...
  EVT MemEltVT = MemVT.getScalarType();

-  // ... ones.
-  if (ExtType == ISD::SEXTLOAD) {
+  if (ExtType == ISD::SEXTLOAD) { // ... ones.
    SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
-
-    SDValue Ops[] = {
-      DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
-      Load->getChain()
-    };
-
-    return DAG.getMergeValues(Ops, DL);
+    Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
+  } else { // ... or zeros.
+    Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
  }

-  // ... or zeros.
  SDValue Ops[] = {
-    DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
-    Load->getChain()
+    Ret,
+    Read.getValue(1) // This should be our output chain
  };

  return DAG.getMergeValues(Ops, DL);
@ -1365,12 +1385,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
  SDValue Chain = LoadNode->getChain();
  SDValue Ptr = LoadNode->getBasePtr();

-  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
-    SDValue MergedValues[2] = {
-      scalarizeVectorLoad(LoadNode, DAG),
-      Chain
-    };
-    return DAG.getMergeValues(MergedValues, DL);
+  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+      LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+      VT.isVector()) {
+      return scalarizeVectorLoad(LoadNode, DAG);
  }

  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
@ -1421,8 +1439,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    return DAG.getMergeValues(MergedValues, DL);
  }

-  SDValue LoweredLoad;
-
  // For most operations returning SDValue() will result in the node being
  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
  // need to manually expand loads that may be legal in some address spaces and
@ -1447,47 +1463,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
    return SDValue();
  }

-  // Lowering for indirect addressing
-  const MachineFunction &MF = DAG.getMachineFunction();
-  const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
-  unsigned StackWidth = TFL->getStackWidth(MF);
-
-  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
-
-  if (VT.isVector()) {
-    unsigned NumElemVT = VT.getVectorNumElements();
-    EVT ElemVT = VT.getVectorElementType();
-    SDValue Loads[4];
-
-    assert(NumElemVT <= 4);
-    assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
-                                      "vector width in load");
-
-    for (unsigned i = 0; i < NumElemVT; ++i) {
-      unsigned Channel, PtrIncr;
-      getStackAddress(StackWidth, i, Channel, PtrIncr);
-      Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
-                        DAG.getConstant(PtrIncr, DL, MVT::i32));
-      Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
-                             Chain, Ptr,
-                             DAG.getTargetConstant(Channel, DL, MVT::i32),
-                             Op.getOperand(2));
-    }
-    EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElemVT);
-    LoweredLoad = DAG.getBuildVector(TargetVT, DL, makeArrayRef(Loads, NumElemVT));
-  } else {
-    LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
-                              Chain, Ptr,
-                              DAG.getTargetConstant(0, DL, MVT::i32), // Channel
-                              Op.getOperand(2));
+  // DWORDADDR ISD marks already shifted address
+  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
+    assert(VT == MVT::i32);
+    Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
+    Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
+    return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
  }
-
-  SDValue Ops[2] = {
-    LoweredLoad,
-    Chain
-  };
-
-  return DAG.getMergeValues(Ops, DL);
+  return SDValue();
 }

 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@ -1268,6 +1268,17 @@ let Predicates = [isR600] in {

 defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;

+// Hardcode channel to 0
+// NOTE: LSHR is not available here. LSHR is per family instruction
+def : Pat <
+  (i32 (load_private ADDRIndirect:$addr) ),
+  (R600_RegisterLoad FRAMEri:$addr, (i32 0))
+>;
+def : Pat <
+  (store_private i32:$val, ADDRIndirect:$addr),
+  (R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0))
+>;
+

 //===----------------------------------------------------------------------===//
 // Pseudo instructions
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -99,6 +99,18 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::STORE, MVT::v16i32, Custom);
  setOperationAction(ISD::STORE, MVT::i1, Custom);

+  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+  setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
+  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
+  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand);
+  setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
+  setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
+  setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
+
+
  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
  setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
@ -699,7 +711,8 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,

 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                         const SDLoc &SL, SDValue Chain,
-                                         unsigned Offset, bool Signed) const {
+                                         unsigned Offset, bool Signed,
+                                         const ISD::InputArg *Arg) const {
  const DataLayout &DL = DAG.getDataLayout();
  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
  PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
@ -713,20 +726,21 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                             MachineMemOperand::MODereferenceable |
                             MachineMemOperand::MOInvariant);

-  SDValue Val;
+  SDValue Val = Load;
+  if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
+      VT.bitsLT(MemVT)) {
+    unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
+    Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
+  }
+
  if (MemVT.isFloatingPoint())
-    Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
+    Val = getFPExtOrFPTrunc(DAG, Val, SL, VT);
  else if (Signed)
-    Val = DAG.getSExtOrTrunc(Load, SL, VT);
+    Val = DAG.getSExtOrTrunc(Val, SL, VT);
  else
-    Val = DAG.getZExtOrTrunc(Load, SL, VT);
+    Val = DAG.getZExtOrTrunc(Val, SL, VT);

-  SDValue Ops[] = {
-    Val,
-    Load.getValue(1)
-  };
-
-  return DAG.getMergeValues(Ops, SL);
+  return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
 }

 SDValue SITargetLowering::LowerFormalArguments(
@ -899,7 +913,8 @@ SDValue SITargetLowering::LowerFormalArguments(
      // The first 36 bytes of the input buffer contains information about
      // thread group and global sizes.
      SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, Chain,
-                                   Offset, Ins[i].Flags.isSExt());
+                                   Offset, Ins[i].Flags.isSExt(),
+                                   &Ins[i]);
      Chains.push_back(Arg.getValue(1));

      auto *ParamTy =
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@ -24,7 +24,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
  SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
                            unsigned Offset) const;
  SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
-                         SDValue Chain, unsigned Offset, bool Signed) const;
+                         SDValue Chain, unsigned Offset, bool Signed,
+                         const ISD::InputArg *Arg = nullptr) const;
  SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
                             SelectionDAG &DAG) const override;
  SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
--- a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@ -203,8 +203,8 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
 bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
                                                   unsigned ConstraintCode,
                                                   std::vector<SDValue> &OutOps) {
-  assert(ConstraintCode == InlineAsm::Constraint_m ||
-         ConstraintCode == InlineAsm::Constraint_Q &&
+  assert((ConstraintCode == InlineAsm::Constraint_m ||
+         ConstraintCode == InlineAsm::Constraint_Q) &&
      "Unexpected asm memory constraint");

  MachineRegisterInfo &RI = MF->getRegInfo();
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@ -14,6 +14,7 @@

 #include "AVRISelLowering.h"

+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@ -1933,5 +1934,45 @@ void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }

+unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
+                                              EVT VT,
+                                              SelectionDAG &DAG) const {
+  unsigned Reg;
+
+  if (VT == MVT::i8) {
+    Reg = StringSwitch<unsigned>(RegName)
+      .Case("r0", AVR::R0).Case("r1", AVR::R1).Case("r2", AVR::R2)
+      .Case("r3", AVR::R3).Case("r4", AVR::R4).Case("r5", AVR::R5)
+      .Case("r6", AVR::R6).Case("r7", AVR::R7).Case("r8", AVR::R8)
+      .Case("r9", AVR::R9).Case("r10", AVR::R10).Case("r11", AVR::R11)
+      .Case("r12", AVR::R12).Case("r13", AVR::R13).Case("r14", AVR::R14)
+      .Case("r15", AVR::R15).Case("r16", AVR::R16).Case("r17", AVR::R17)
+      .Case("r18", AVR::R18).Case("r19", AVR::R19).Case("r20", AVR::R20)
+      .Case("r21", AVR::R21).Case("r22", AVR::R22).Case("r23", AVR::R23)
+      .Case("r24", AVR::R24).Case("r25", AVR::R25).Case("r26", AVR::R26)
+      .Case("r27", AVR::R27).Case("r28", AVR::R28).Case("r29", AVR::R29)
+      .Case("r30", AVR::R30).Case("r31", AVR::R31)
+      .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
+      .Default(0);
+  } else {
+    Reg = StringSwitch<unsigned>(RegName)
+      .Case("r0", AVR::R1R0).Case("r2", AVR::R3R2)
+      .Case("r4", AVR::R5R4).Case("r6", AVR::R7R6)
+      .Case("r8", AVR::R9R8).Case("r10", AVR::R11R10)
+      .Case("r12", AVR::R13R12).Case("r14", AVR::R15R14)
+      .Case("r16", AVR::R17R16).Case("r18", AVR::R19R18)
+      .Case("r20", AVR::R21R20).Case("r22", AVR::R23R22)
+      .Case("r24", AVR::R25R24).Case("r26", AVR::R27R26)
+      .Case("r28", AVR::R29R28).Case("r30", AVR::R31R30)
+      .Case("X", AVR::R27R26).Case("Y", AVR::R29R28).Case("Z", AVR::R31R30)
+      .Default(0);
+  }
+
+  if (Reg)
+    return Reg;
+
+  report_fatal_error("Invalid register name global variable");
+}
+
 } // end of namespace llvm

--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
@ -116,6 +116,9 @@ class AVRTargetLowering : public TargetLowering {
                                    std::vector<SDValue> &Ops,
                                    SelectionDAG &DAG) const override;

+  unsigned getRegisterByName(const char* RegName, EVT VT,
+                             SelectionDAG &DAG) const override;
+
 private:
  SDValue getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AVRcc,
                    SelectionDAG &DAG, SDLoc dl) const;
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@ -13,15 +13,13 @@

 #include "BPF.h"
 #include "BPFInstrInfo.h"
-#include "BPFSubtarget.h"
-#include "BPFTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <iterator>

 #define GET_INSTRINFO_CTOR_DTOR
 #include "BPFGenInstrInfo.inc"
@ -109,11 +107,11 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
      while (std::next(I) != MBB.end())
        std::next(I)->eraseFromParent();
      Cond.clear();
-      FBB = 0;
+      FBB = nullptr;

      // Delete the J if it's equivalent to a fall-through.
      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-        TBB = 0;
+        TBB = nullptr;
        I->eraseFromParent();
        I = MBB.end();
        continue;
--- a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@ -12,16 +12,15 @@
 //===----------------------------------------------------------------------===//

 #include "BPF.h"
-#include "BPFRegisterInfo.h"
 #include "BPFSubtarget.h"
 #include "MCTargetDesc/BPFMCTargetDesc.h"
-
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <cstdint>

 using namespace llvm;

@ -36,14 +35,15 @@ class BPFDisassembler : public MCDisassembler {
 public:
  BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
      : MCDisassembler(STI, Ctx) {}
-  virtual ~BPFDisassembler() {}
+  ~BPFDisassembler() override = default;

  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
                              ArrayRef<uint8_t> Bytes, uint64_t Address,
                              raw_ostream &VStream,
                              raw_ostream &CStream) const override;
 };
-}
+
+} // end anonymous namespace

 static MCDisassembler *createBPFDisassembler(const Target &T,
                                             const MCSubtargetInfo &STI,
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@ -8,28 +8,24 @@
 //===----------------------------------------------------------------------===//

 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>

 using namespace llvm;

 namespace {
+
 class BPFAsmBackend : public MCAsmBackend {
 public:
  bool IsLittleEndian;

  BPFAsmBackend(bool IsLittleEndian)
    : MCAsmBackend(), IsLittleEndian(IsLittleEndian) {}
-  ~BPFAsmBackend() override {}
+  ~BPFAsmBackend() override = default;

  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                  uint64_t Value, bool IsPCRel) const override;
@ -53,6 +49,8 @@ class BPFAsmBackend : public MCAsmBackend {
  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
 };

+} // end anonymous namespace
+
 bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
  if ((Count % 8) != 0)
    return false;
@ -66,7 +64,6 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
 void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                               unsigned DataSize, uint64_t Value,
                               bool IsPCRel) const {
-
  if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
    assert(Value == 0);
  } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
@ -92,7 +89,6 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
  return createBPFELFObjectWriter(OS, 0, IsLittleEndian);
 }
-}

 MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
                                        const MCRegisterInfo &MRI,
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@ -10,29 +10,30 @@
 #include "MCTargetDesc/BPFMCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cstdint>

 using namespace llvm;

 namespace {
+
 class BPFELFObjectWriter : public MCELFObjectTargetWriter {
 public:
  BPFELFObjectWriter(uint8_t OSABI);
-
-  ~BPFELFObjectWriter() override;
+  ~BPFELFObjectWriter() override = default;

 protected:
  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
                        const MCFixup &Fixup, bool IsPCRel) const override;
 };
-}
+
+} // end anonymous namespace

 BPFELFObjectWriter::BPFELFObjectWriter(uint8_t OSABI)
    : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_BPF,
                              /*HasRelocationAddend*/ false) {}

-BPFELFObjectWriter::~BPFELFObjectWriter() {}
-
 unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
                                          const MCFixup &Fixup,
                                          bool IsPCRel) const {
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@ -12,24 +12,25 @@
 //===----------------------------------------------------------------------===//

 #include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
 using namespace llvm;

 #define DEBUG_TYPE "mccodeemitter"

 namespace {
+
 class BPFMCCodeEmitter : public MCCodeEmitter {
-  BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
-  void operator=(const BPFMCCodeEmitter &) = delete;
  const MCInstrInfo &MCII;
  const MCRegisterInfo &MRI;
  bool IsLittleEndian;
@ -38,8 +39,9 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
  BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
                   bool IsLittleEndian)
      : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {}
-
-  ~BPFMCCodeEmitter() {}
+  BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
+  void operator=(const BPFMCCodeEmitter &) = delete;
+  ~BPFMCCodeEmitter() override = default;

  // getBinaryCodeForInstr - TableGen'erated function for getting the
  // binary encoding for an instruction.
@ -66,7 +68,8 @@ class BPFMCCodeEmitter : public MCCodeEmitter {
  void verifyInstructionPredicates(const MCInst &MI,
                                   uint64_t AvailableFeatures) const;
 };
-}
+
+} // end anonymous namespace

 MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@ -12,14 +12,13 @@
 //===----------------------------------------------------------------------===//

 #include "BPF.h"
-#include "BPFMCTargetDesc.h"
-#include "BPFMCAsmInfo.h"
 #include "InstPrinter/BPFInstPrinter.h"
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+#include "MCTargetDesc/BPFMCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"

 #define GET_INSTRINFO_MC_DESC
@ -64,7 +63,7 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
                                             const MCRegisterInfo &MRI) {
  if (SyntaxVariant == 0)
    return new BPFInstPrinter(MAI, MII, MRI);
-  return 0;
+  return nullptr;
 }

 extern "C" void LLVMInitializeBPFTargetMC() {
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@ -101,7 +101,7 @@ LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
 }

 LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
-        const char* Triple, const char* CPU, const char* Features,
+        const char *Triple, const char *CPU, const char *Features,
        LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
        LLVMCodeModel CodeModel) {
  Optional<Reloc::Model> RM;
@ -139,7 +139,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,

  TargetOptions opt;
  return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
-    CM, OL));
+                                             CM, OL));
 }

 void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@ -28,6 +28,7 @@ class FunctionPass;
 // LLVM IR passes.
 ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
 void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
+ModulePass *createWebAssemblyFixFunctionBitcasts();
 FunctionPass *createWebAssemblyOptimizeReturned();

 // ISel and immediate followup passes.
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@ -0,0 +1,159 @@
+//===-- WebAssemblyFixFunctionBitcasts.cpp - Fix function bitcasts --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Fix bitcasted functions.
+///
+/// WebAssembly requires caller and callee signatures to match, however in LLVM,
+/// some amount of slop is vaguely permitted. Detect mismatch by looking for
+/// bitcasts of functions and rewrite them to use wrapper functions instead.
+///
+/// This doesn't catch all cases, such as when a function's address is taken in
+/// one place and casted in another, but it works for many common cases.
+///
+/// Note that LLVM already optimizes away function bitcasts in common cases by
+/// dropping arguments as needed, so this pass only ends up getting used in less
+/// common cases.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fix-function-bitcasts"
+
+namespace {
+class FixFunctionBitcasts final : public ModulePass {
+  StringRef getPassName() const override {
+    return "WebAssembly Fix Function Bitcasts";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    ModulePass::getAnalysisUsage(AU);
+  }
+
+  bool runOnModule(Module &M) override;
+
+public:
+  static char ID;
+  FixFunctionBitcasts() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char FixFunctionBitcasts::ID = 0;
+ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
+  return new FixFunctionBitcasts();
+}
+
+// Recursively descend the def-use lists from V to find non-bitcast users of
+// bitcasts of V.
+static void FindUses(Value *V, Function &F,
+                     SmallVectorImpl<std::pair<Use *, Function *>> &Uses) {
+  for (Use &U : V->uses()) {
+    if (BitCastOperator *BC = dyn_cast<BitCastOperator>(U.getUser()))
+      FindUses(BC, F, Uses);
+    else if (U.get()->getType() != F.getType())
+      Uses.push_back(std::make_pair(&U, &F));
+  }
+}
+
+// Create a wrapper function with type Ty that calls F (which may have a
+// different type). Attempt to support common bitcasted function idioms:
+//  - Call with more arguments than needed: arguments are dropped
+//  - Call with fewer arguments than needed: arguments are filled in with undef
+//  - Return value is not needed: drop it
+//  - Return value needed but not present: supply an undef
+//  
+// For now, return nullptr without creating a wrapper if the wrapper cannot
+// be generated due to incompatible types.
+static Function *CreateWrapper(Function *F, FunctionType *Ty) {
+  Module *M = F->getParent();
+
+  Function *Wrapper =
+      Function::Create(Ty, Function::PrivateLinkage, "bitcast", M);
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
+
+  // Determine what arguments to pass.
+  SmallVector<Value *, 4> Args;
+  Function::arg_iterator AI = Wrapper->arg_begin();
+  FunctionType::param_iterator PI = F->getFunctionType()->param_begin();
+  FunctionType::param_iterator PE = F->getFunctionType()->param_end();
+  for (; AI != Wrapper->arg_end() && PI != PE; ++AI, ++PI) {
+    if (AI->getType() != *PI) {
+      Wrapper->eraseFromParent();
+      return nullptr;
+    }
+    Args.push_back(&*AI);
+  }
+  for (; PI != PE; ++PI)
+    Args.push_back(UndefValue::get(*PI));
+
+  CallInst *Call = CallInst::Create(F, Args, "", BB);
+
+  // Determine what value to return.
+  if (Ty->getReturnType()->isVoidTy())
+    ReturnInst::Create(M->getContext(), BB);
+  else if (F->getFunctionType()->getReturnType()->isVoidTy())
+    ReturnInst::Create(M->getContext(), UndefValue::get(Ty->getReturnType()),
+                       BB);
+  else if (F->getFunctionType()->getReturnType() == Ty->getReturnType())
+    ReturnInst::Create(M->getContext(), Call, BB);
+  else {
+    Wrapper->eraseFromParent();
+    return nullptr;
+  }
+
+  return Wrapper;
+}
+
+bool FixFunctionBitcasts::runOnModule(Module &M) {
+  SmallVector<std::pair<Use *, Function *>, 0> Uses;
+
+  // Collect all the places that need wrappers.
+  for (Function &F : M)
+    FindUses(&F, F, Uses);
+
+  DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
+
+  for (auto &UseFunc : Uses) {
+    Use *U = UseFunc.first;
+    Function *F = UseFunc.second;
+    PointerType *PTy = cast<PointerType>(U->get()->getType());
+    FunctionType *Ty = dyn_cast<FunctionType>(PTy->getElementType());
+
+    // If the function is casted to something like i8* as a "generic pointer"
+    // to be later casted to something else, we can't generate a wrapper for it.
+    // Just ignore such casts for now.
+    if (!Ty)
+      continue;
+
+    auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
+    if (Pair.second)
+      Pair.first->second = CreateWrapper(F, Ty);
+
+    Function *Wrapper = Pair.first->second;
+    if (!Wrapper)
+      continue;
+
+    if (isa<Constant>(U->get()))
+      U->get()->replaceAllUsesWith(Wrapper);
+    else
+      U->set(Wrapper);
+  }
+
+  return true;
+}
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@ -40,8 +40,8 @@ defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>;
 defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>;

 let isCommutable = 1 in {
-defm EQ : ComparisonInt<SETEQ, "eq  ", 0x46, 0x68>;
-defm NE : ComparisonInt<SETNE, "ne  ", 0x47, 0x69>;
+defm EQ : ComparisonInt<SETEQ, "eq  ", 0x46, 0x51>;
+defm NE : ComparisonInt<SETNE, "ne  ", 0x47, 0x52>;
 } // isCommutable = 1
 defm LT_S : ComparisonInt<SETLT,  "lt_s", 0x48, 0x53>;
 defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>;
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@ -163,6 +163,10 @@ void WebAssemblyPassConfig::addIRPasses() {
    // control specifically what gets lowered.
    addPass(createAtomicExpandPass(TM));

+  // Fix function bitcasts, as WebAssembly requires caller and callee signatures
+  // to match.
+  addPass(createWebAssemblyFixFunctionBitcasts());
+
  // Optimize "returned" function attributes.
  if (getOptLevel() != CodeGenOpt::None)
    addPass(createWebAssemblyOptimizeReturned());
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -6962,23 +6962,24 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
 }

-/// Try to fold a build_vector that performs an 'addsub' to an X86ISD::ADDSUB
-/// node.
-static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
-                             const X86Subtarget &Subtarget, SelectionDAG &DAG) {
+/// Returns true iff \p BV builds a vector with the result equivalent to
+/// the result of ADDSUB operation.
+/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
+/// are written to the parameters \p Opnd0 and \p Opnd1.
+static bool isAddSub(const BuildVectorSDNode *BV,
+                     const X86Subtarget &Subtarget, SelectionDAG &DAG,
+                     SDValue &Opnd0, SDValue &Opnd1) {
+
  MVT VT = BV->getSimpleValueType(0);
  if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
-      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
-    return SDValue();
+      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
+      (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
+    return false;

-  SDLoc DL(BV);
  unsigned NumElts = VT.getVectorNumElements();
  SDValue InVec0 = DAG.getUNDEF(VT);
  SDValue InVec1 = DAG.getUNDEF(VT);

-  assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
-          VT == MVT::v2f64) && "build_vector with an invalid type found!");
-
  // Odd-numbered elements in the input build vector are obtained from
  // adding two integer/float elements.
  // Even-numbered elements in the input build vector are obtained from
@ -7000,7 +7001,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,

    // Early exit if we found an unexpected opcode.
    if (Opcode != ExpectedOpcode)
-      return SDValue();
+      return false;

    SDValue Op0 = Op.getOperand(0);
    SDValue Op1 = Op.getOperand(1);
@ -7013,11 +7014,11 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
        !isa<ConstantSDNode>(Op0.getOperand(1)) ||
        !isa<ConstantSDNode>(Op1.getOperand(1)) ||
        Op0.getOperand(1) != Op1.getOperand(1))
-      return SDValue();
+      return false;

    unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
    if (I0 != i)
-      return SDValue();
+      return false;

    // We found a valid add/sub node. Update the information accordingly.
    if (i & 1)
@ -7029,39 +7030,118 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
    if (InVec0.isUndef()) {
      InVec0 = Op0.getOperand(0);
      if (InVec0.getSimpleValueType() != VT)
-        return SDValue();
+        return false;
    }
    if (InVec1.isUndef()) {
      InVec1 = Op1.getOperand(0);
      if (InVec1.getSimpleValueType() != VT)
-        return SDValue();
+        return false;
    }

    // Make sure that operands in input to each add/sub node always
    // come from a same pair of vectors.
    if (InVec0 != Op0.getOperand(0)) {
      if (ExpectedOpcode == ISD::FSUB)
-        return SDValue();
+        return false;

      // FADD is commutable. Try to commute the operands
      // and then test again.
      std::swap(Op0, Op1);
      if (InVec0 != Op0.getOperand(0))
-        return SDValue();
+        return false;
    }

    if (InVec1 != Op1.getOperand(0))
-      return SDValue();
+      return false;

    // Update the pair of expected opcodes.
    std::swap(ExpectedOpcode, NextExpectedOpcode);
  }

  // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
-  if (AddFound && SubFound && !InVec0.isUndef() && !InVec1.isUndef())
-    return DAG.getNode(X86ISD::ADDSUB, DL, VT, InVec0, InVec1);
+  if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
+    return false;

-  return SDValue();
+  Opnd0 = InVec0;
+  Opnd1 = InVec1;
+  return true;
+}
+
+/// Returns true if is possible to fold MUL and an idiom that has already been
+/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
+/// If (and only if) true is returned, the operands of FMADDSUB are written to
+/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
+///
+/// Prior to calling this function it should be known that there is some
+/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
+/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
+/// before replacement of such SDNode with ADDSUB operation. Thus the number
+/// of \p Opnd0 uses is expected to be equal to 2.
+/// For example, this function may be called for the following IR:
+///    %AB = fmul fast <2 x double> %A, %B
+///    %Sub = fsub fast <2 x double> %AB, %C
+///    %Add = fadd fast <2 x double> %AB, %C
+///    %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
+///                            <2 x i32> <i32 0, i32 3>
+/// There is a def for %Addsub here, which potentially can be replaced by
+/// X86ISD::ADDSUB operation:
+///    %Addsub = X86ISD::ADDSUB %AB, %C
+/// and such ADDSUB can further be replaced with FMADDSUB:
+///    %Addsub = FMADDSUB %A, %B, %C.
+///
+/// The main reason why this method is called before the replacement of the
+/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
+/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
+/// FMADDSUB is.
+static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
+                       SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
+  if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
+      !Subtarget.hasAnyFMA())
+    return false;
+
+  // FIXME: These checks must match the similar ones in
+  // DAGCombiner::visitFADDForFMACombine. It would be good to have one
+  // function that would answer if it is Ok to fuse MUL + ADD to FMADD
+  // or MUL + ADDSUB to FMADDSUB.
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+  if (!AllowFusion)
+    return false;
+
+  Opnd2 = Opnd1;
+  Opnd1 = Opnd0.getOperand(1);
+  Opnd0 = Opnd0.getOperand(0);
+
+  return true;
+}
+
+/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
+/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
+static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
+                                       const X86Subtarget &Subtarget,
+                                       SelectionDAG &DAG) {
+  SDValue Opnd0, Opnd1;
+  if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
+    return SDValue();
+
+  MVT VT = BV->getSimpleValueType(0);
+  SDLoc DL(BV);
+
+  // Try to generate X86ISD::FMADDSUB node here.
+  SDValue Opnd2;
+  if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
+    return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+
+  // Do not generate X86ISD::ADDSUB node for 512-bit types even though
+  // the ADDSUB idiom has been successfully recognized. There are no known
+  // X86 targets with 512-bit ADDSUB instructions!
+  // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
+  // recognition.
+  if (VT.is512BitVector())
+    return SDValue();
+
+  return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }

 /// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
@ -7290,7 +7370,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
    return VectorConstant;

  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
-  if (SDValue AddSub = LowerToAddSub(BV, Subtarget, DAG))
+  if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
    return AddSub;
  if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
    return HorizontalOp;
@ -12965,6 +13045,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  if (Subtarget.hasVBMI())
    return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);

+  // Try to create an in-lane repeating shuffle mask and then shuffle the
+  // the results into the target lanes.
+  if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
+          DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
+    return V;
+
  // FIXME: Implement direct support for this type!
  return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
 }
@ -16985,9 +17071,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, newSelect, zeroConst);
  }

-  if (Cond.getOpcode() == ISD::SETCC)
-    if (SDValue NewCond = LowerSETCC(Cond, DAG))
+  if (Cond.getOpcode() == ISD::SETCC) {
+    if (SDValue NewCond = LowerSETCC(Cond, DAG)) {
      Cond = NewCond;
+      // If the condition was updated, it's possible that the operands of the
+      // select were also updated (for example, EmitTest has a RAUW). Refresh
+      // the local references to the select operands in case they got stale.
+      Op1 = Op.getOperand(1);
+      Op2 = Op.getOperand(2);
+    }
+  }

  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
@ -17193,22 +17286,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
  if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
    return SDValue();

-  if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
+  if (VT.is512BitVector() && InVTElt != MVT::i1) {
    if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
      return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
  }

-  assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
+  assert (InVTElt == MVT::i1 && "Unexpected vector type");
  MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
-  SDValue NegOne = DAG.getConstant(
-      APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
-  SDValue Zero = DAG.getConstant(
-      APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
+  SDValue V;
+  if (Subtarget.hasDQI()) {
+    V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
+    assert(!VT.is512BitVector() && "Unexpected vector type");
+  } else {
+    SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
+    SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
+    V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
+    if (VT.is512BitVector())
+      return V;
+  }

-  SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
-  if (VT.is512BitVector())
-    return V;
  return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
 }

@ -21528,6 +21625,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
    return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
  }

+  // It's worth extending once and using the vXi16/vXi32 shifts for smaller
+  // types, but without AVX512 the extra overheads to get from vXi8 to vXi32
+  // make the existing SSE solution better.
+  if ((Subtarget.hasInt256() && VT == MVT::v8i16) ||
+      (Subtarget.hasAVX512() && VT == MVT::v16i16) ||
+      (Subtarget.hasAVX512() && VT == MVT::v16i8) ||
+      (Subtarget.hasBWI() && VT == MVT::v32i8)) {
+    MVT EvtSVT = (VT == MVT::v32i8 ? MVT::i16 : MVT::i32);
+    MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements());
+    unsigned ExtOpc =
+        Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    R = DAG.getNode(ExtOpc, dl, ExtVT, R);
+    Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
+  }
+
  if (VT == MVT::v16i8 ||
      (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP())) {
    MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
@ -21636,19 +21750,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
    }
  }

-  // It's worth extending once and using the v8i32 shifts for 16-bit types, but
-  // the extra overheads to get from v16i8 to v8i32 make the existing SSE
-  // solution better.
-  if (Subtarget.hasInt256() && VT == MVT::v8i16) {
-    MVT ExtVT = MVT::v8i32;
-    unsigned ExtOpc =
-        Op.getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-    R = DAG.getNode(ExtOpc, dl, ExtVT, R);
-    Amt = DAG.getNode(ISD::ANY_EXTEND, dl, ExtVT, Amt);
-    return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
-  }
-
  if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) {
    MVT ExtVT = MVT::v8i32;
    SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
@ -27763,29 +27864,32 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
  return SDValue();
 }

-/// \brief Try to combine a shuffle into a target-specific add-sub node.
+/// Returns true iff the shuffle node \p N can be replaced with ADDSUB
+/// operation. If true is returned then the operands of ADDSUB operation
+/// are written to the parameters \p Opnd0 and \p Opnd1.
 ///
-/// We combine this directly on the abstract vector shuffle nodes so it is
-/// easier to generically match. We also insert dummy vector shuffle nodes for
-/// the operands which explicitly discard the lanes which are unused by this
-/// operation to try to flow through the rest of the combiner the fact that
-/// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
-                                      SelectionDAG &DAG) {
-  SDLoc DL(N);
+/// We combine shuffle to ADDSUB directly on the abstract vector shuffle nodes
+/// so it is easier to generically match. We also insert dummy vector shuffle
+/// nodes for the operands which explicitly discard the lanes which are unused
+/// by this operation to try to flow through the rest of the combiner
+/// the fact that they're unused.
+static bool isAddSub(SDNode *N, const X86Subtarget &Subtarget,
+                     SDValue &Opnd0, SDValue &Opnd1) {
+
  EVT VT = N->getValueType(0);
  if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
-      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
-    return SDValue();
+      (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
+      (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
+    return false;

  // We only handle target-independent shuffles.
  // FIXME: It would be easy and harmless to use the target shuffle mask
  // extraction tool to support more.
  if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
-    return SDValue();
+    return false;

  ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
-  SmallVector<int, 8> Mask(OrigMask.begin(), OrigMask.end());
+  SmallVector<int, 16> Mask(OrigMask.begin(), OrigMask.end());

  SDValue V1 = N->getOperand(0);
  SDValue V2 = N->getOperand(1);
@ -27796,27 +27900,57 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
    ShuffleVectorSDNode::commuteMask(Mask);
    std::swap(V1, V2);
  } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
-    return SDValue();
+    return false;

  // If there are other uses of these operations we can't fold them.
  if (!V1->hasOneUse() || !V2->hasOneUse())
-    return SDValue();
+    return false;

  // Ensure that both operations have the same operands. Note that we can
  // commute the FADD operands.
  SDValue LHS = V1->getOperand(0), RHS = V1->getOperand(1);
  if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) &&
      (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS))
-    return SDValue();
+    return false;

  // We're looking for blends between FADD and FSUB nodes. We insist on these
  // nodes being lined up in a specific expected pattern.
  if (!(isShuffleEquivalent(V1, V2, Mask, {0, 3}) ||
        isShuffleEquivalent(V1, V2, Mask, {0, 5, 2, 7}) ||
-        isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
+        isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15}) ||
+        isShuffleEquivalent(V1, V2, Mask, {0, 17, 2, 19, 4, 21, 6, 23,
+                                           8, 25, 10, 27, 12, 29, 14, 31})))
+    return false;
+
+  Opnd0 = LHS;
+  Opnd1 = RHS;
+  return true;
+}
+
+/// \brief Try to combine a shuffle into a target-specific add-sub or
+/// mul-add-sub node.
+static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
+                                                const X86Subtarget &Subtarget,
+                                                SelectionDAG &DAG) {
+  SDValue Opnd0, Opnd1;
+  if (!isAddSub(N, Subtarget, Opnd0, Opnd1))
    return SDValue();

-  return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Try to generate X86ISD::FMADDSUB node here.
+  SDValue Opnd2;
+  if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
+    return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
+
+  // Do not generate X86ISD::ADDSUB node for 512-bit types even though
+  // the ADDSUB idiom has been successfully recognized. There are no known
+  // X86 targets with 512-bit ADDSUB instructions!
+  if (VT.is512BitVector())
+    return SDValue();
+
+  return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }

 // We are looking for a shuffle where both sources are concatenated with undef
@ -27878,7 +28012,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
  // If we have legalized the vector types, look for blends of FADD and FSUB
  // nodes that we can fuse into an ADDSUB node.
  if (TLI.isTypeLegal(VT))
-    if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
+    if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG))
      return AddSub;

  // During Type Legalization, when promoting illegal vector types,
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@ -443,6 +443,22 @@ def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
               [(set VR512:$dst, (v16i32 immAllOnesV))]>;
 }

+// Alias instructions that allow VPTERNLOG to be used with a mask to create
+// a mix of all ones and all zeros elements. This is done this way to force
+// the same register to be used as input for all three sources.
+let isPseudo = 1, Predicates = [HasAVX512] in {
+def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
+                                (ins VK16WM:$mask), "",
+                           [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
+                                                      (v16i32 immAllOnesV),
+                                                      (v16i32 immAllZerosV)))]>;
+def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
+                                (ins VK8WM:$mask), "",
+                [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
+                                           (bc_v8i64 (v16i32 immAllOnesV)),
+                                           (bc_v8i64 (v16i32 immAllZerosV))))]>;
+}
+
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
    isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
@ -1064,10 +1080,10 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
                           (v8f32 VR256X:$src), 1)>;
 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v4f64 VR256X:$src), 1)>; 
+                           (v4f64 VR256X:$src), 1)>;
 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v4i64 VR256X:$src), 1)>; 
+                           (v4i64 VR256X:$src), 1)>;
 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                           (v8i32 VR256X:$src), 1)>;
@ -1485,8 +1501,7 @@ defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
 // AVX-512 - BLEND using mask
 //
 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
-  let hasSideEffects = 0 in
+  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
  def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
             (ins _.RC:$src1, _.RC:$src2),
             !strconcat(OpcodeStr,
@ -1496,16 +1511,13 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
             !strconcat(OpcodeStr,
             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
-             [(set _.RC:$dst, (vselect _.KRCWM:$mask,
-                                (_.VT _.RC:$src2),
-                                (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
-  let hasSideEffects = 0 in
+             []>, EVEX_4V, EVEX_K;
  def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
             (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
             !strconcat(OpcodeStr,
             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
             []>, EVEX_4V, EVEX_KZ;
-  let mayLoad = 1, hasSideEffects = 0 in
+  let mayLoad = 1 in {
  def rm  : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
             (ins _.RC:$src1, _.MemOp:$src2),
             !strconcat(OpcodeStr,
@ -1515,38 +1527,32 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
             !strconcat(OpcodeStr,
             "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
-             [(set _.RC:$dst, (vselect _.KRCWM:$mask,
-                                 (_.VT (bitconvert (_.LdFrag addr:$src2))),
-                                 (_.VT _.RC:$src1)))]>,
-              EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
-  let mayLoad = 1, hasSideEffects = 0 in
+             []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
             (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
             !strconcat(OpcodeStr,
             "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
             []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
  }
+  }
 }
 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {

+  let mayLoad = 1, hasSideEffects = 0 in {
  def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
      (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
       !strconcat(OpcodeStr,
            "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
            "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
-      [(set _.RC:$dst,(vselect _.KRCWM:$mask,
-                        (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
-                        (_.VT _.RC:$src1)))]>,
-      EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+      []>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;

-  let mayLoad = 1, hasSideEffects = 0 in
  def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
      (ins _.RC:$src1, _.ScalarMemOp:$src2),
       !strconcat(OpcodeStr,
            "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
            "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
      []>,  EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
-
+  }
 }

 multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
@ -1582,21 +1588,6 @@ defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;


-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
-                            (v8f32 VR256X:$src2))),
-            (EXTRACT_SUBREG
-              (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
-            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
-                            (v8i32 VR256X:$src2))),
-            (EXTRACT_SUBREG
-                (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
-            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-}
 //===----------------------------------------------------------------------===//
 // Compare Instructions
 //===----------------------------------------------------------------------===//
@ -2735,7 +2726,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                      (ins _.KRCWM:$mask,  _.RC:$src),
                      !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
                       "${dst} {${mask}} {z}, $src}"),
-                       [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
+                       [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
                                           (_.VT _.RC:$src),
                                           _.ImmAllZerosV)))], _.ExeDomain>,
                       EVEX, EVEX_KZ;
@ -2972,6 +2963,30 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
                           (v16i32 VR512:$src))),
                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;

+// Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
+// available. Use a 512-bit operation and extract.
+let Predicates = [HasAVX512, NoVLX] in {
+def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
+                          (v8f32 VR256X:$src0))),
+          (EXTRACT_SUBREG
+           (v16f32
+            (VMOVAPSZrrk
+             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
+             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
+           sub_ymm)>;
+
+def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
+                          (v8i32 VR256X:$src0))),
+          (EXTRACT_SUBREG
+           (v16i32
+            (VMOVDQA32Zrrk
+             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
+             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
+           sub_ymm)>;
+}
+
 let Predicates = [HasVLX, NoBWI] in {
  // 128-bit load/store without BWI.
  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
@ -3116,13 +3131,13 @@ let Predicates = [HasVLX] in {
     (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
 }

-
-// Move Int Doubleword to Packed Double Int
-//
-let ExeDomain = SSEPackedInt in {
-def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set VR128X:$dst,
+
+// Move Int Doubleword to Packed Double Int
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set VR128X:$dst,
                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
                        EVEX;
 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
@ -3152,47 +3167,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src
 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
-                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
-                         EVEX_CD8<64, CD8VT1>;
-}
-} // ExeDomain = SSEPackedInt
-
-// Move Int Doubleword to Single Scalar
-//
-let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
-def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set FR32X:$dst, (bitconvert GR32:$src))],
+                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
+                         EVEX_CD8<64, CD8VT1>;
+}
+} // ExeDomain = SSEPackedInt
+
+// Move Int Doubleword to Single Scalar
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set FR32X:$dst, (bitconvert GR32:$src))],
                      IIC_SSE_MOVDQ>, EVEX;

 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
-                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
-} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
-
-// Move doubleword from xmm register to r/m32
-//
-let ExeDomain = SSEPackedInt in {
-def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
-                       "vmovd\t{$src, $dst|$dst, $src}",
-                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
+// Move doubleword from xmm register to r/m32
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+                       "vmovd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
                       EVEX;
 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                       (ins i32mem:$dst, VR128X:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
-                       [(store (i32 (extractelt (v4i32 VR128X:$src),
-                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
-                       EVEX, EVEX_CD8<32, CD8VT1>;
-} // ExeDomain = SSEPackedInt
-
-// Move quadword from xmm1 register to r/m64
-//
-let ExeDomain = SSEPackedInt in {
-def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
-                      "vmovq\t{$src, $dst|$dst, $src}",
-                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
+                       [(store (i32 (extractelt (v4i32 VR128X:$src),
+                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+                       EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt
+
+// Move quadword from xmm1 register to r/m64
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
                                                   (iPTR 0)))],
                      IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
                      Requires<[HasAVX512, In64BitMode]>;
@ -3213,39 +3228,39 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),

 let hasSideEffects = 0 in
 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
-                             (ins VR128X:$src),
-                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
-                             EVEX, VEX_W;
-} // ExeDomain = SSEPackedInt
-
-// Move Scalar Single to Double Int
-//
-let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
-def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
-                      (ins FR32X:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
+                             (ins VR128X:$src),
+                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
+                             EVEX, VEX_W;
+} // ExeDomain = SSEPackedInt
+
+// Move Scalar Single to Double Int
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
+                      (ins FR32X:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
                      [(set GR32:$dst, (bitconvert FR32X:$src))],
                      IIC_SSE_MOVD_ToGP>, EVEX;
 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                      (ins i32mem:$dst, FR32X:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
-} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
-
-// Move Quadword Int to Packed Quadword Int
-//
-let ExeDomain = SSEPackedInt in {
-def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
-                      (ins i64mem:$src),
-                      "vmovq\t{$src, $dst|$dst, $src}",
-                      [(set VR128X:$dst,
-                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
-                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
-} // ExeDomain = SSEPackedInt
-
-//===----------------------------------------------------------------------===//
-// AVX-512  MOVSS, MOVSD
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
+// Move Quadword Int to Packed Quadword Int
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+                      (ins i64mem:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [(set VR128X:$dst,
+                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
+                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
+} // ExeDomain = SSEPackedInt
+
+//===----------------------------------------------------------------------===//
+// AVX-512  MOVSS, MOVSD
 //===----------------------------------------------------------------------===//

 multiclass avx512_move_scalar<string asm, SDNode OpNode,
@ -8646,6 +8661,28 @@ def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
          (VMOVDDUPZ128rm addr:$src)>;
 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
          (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+
+def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
+                   (v2f64 VR128X:$src0)),
+          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
+                   (bitconvert (v4i32 immAllZerosV))),
+          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
+
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+                   (v2f64 VR128X:$src0)),
+          (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
+                           (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
+                   (bitconvert (v4i32 immAllZerosV))),
+          (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                   (v2f64 VR128X:$src0)),
+          (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
+                   (bitconvert (v4i32 immAllZerosV))),
+          (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
 }

 //===----------------------------------------------------------------------===//
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@ -543,7 +543,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
    { X86::MOV8rr,          X86::MOV8rm,              0 },
    { X86::MOVAPDrr,        X86::MOVAPDrm,            TB_ALIGN_16 },
    { X86::MOVAPSrr,        X86::MOVAPSrm,            TB_ALIGN_16 },
-    { X86::MOVDDUPrr,       X86::MOVDDUPrm,           0 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm,           TB_NO_REVERSE },
    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm,         0 },
    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm,          0 },
    { X86::MOVDQArr,        X86::MOVDQArm,            TB_ALIGN_16 },
@ -661,7 +661,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
    { X86::VMOV64toSDrr,    X86::VMOV64toSDrm,        0 },
    { X86::VMOVAPDrr,       X86::VMOVAPDrm,           TB_ALIGN_16 },
    { X86::VMOVAPSrr,       X86::VMOVAPSrm,           TB_ALIGN_16 },
-    { X86::VMOVDDUPrr,      X86::VMOVDDUPrm,          0 },
+    { X86::VMOVDDUPrr,      X86::VMOVDDUPrm,          TB_NO_REVERSE },
    { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 },
    { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 },
    { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
@ -6864,6 +6864,21 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       .addReg(Reg, RegState::Undef).addImm(0xff);
    return true;
  }
+  case X86::AVX512_512_SEXT_MASK_32:
+  case X86::AVX512_512_SEXT_MASK_64: {
+    unsigned Reg = MIB->getOperand(0).getReg();
+    unsigned MaskReg = MIB->getOperand(1).getReg();
+    unsigned MaskState = getRegState(MIB->getOperand(1));
+    unsigned Opc = (MI.getOpcode() == X86::AVX512_512_SEXT_MASK_64) ?
+                   X86::VPTERNLOGQZrrikz : X86::VPTERNLOGDZrrikz;
+    MI.RemoveOperand(1);
+    MIB->setDesc(get(Opc));
+    // VPTERNLOG needs 3 register inputs and an immediate.
+    // 0xff will return 1s for any input.
+    MIB.addReg(Reg, RegState::Undef).addReg(MaskReg, MaskState)
+       .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef).addImm(0xff);
+    return true;
+  }
  case X86::VMOVAPSZ128rm_NOVLX:
    return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
                           get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@ -6397,7 +6397,7 @@ let Predicates = [HasAVX] in {
  defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround",
                                 int_x86_sse41_round_ss,
                                 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
-  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;                                 
+  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
 }

 let Predicates = [UseAVX] in {
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -144,6 +144,10 @@ int X86TTIImpl::getArithmeticInstrCost(
  }

  static const CostTblEntry AVX512BWUniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v64i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v64i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v64i8,   4 }, // psrlw, pand, pxor, psubb.
+
    { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
    { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
  };
@ -168,6 +172,10 @@ int X86TTIImpl::getArithmeticInstrCost(
  }

  static const CostTblEntry AVX2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v32i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v32i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v32i8,   4 }, // psrlw, pand, pxor, psubb.
+
    { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.

    { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
@ -184,6 +192,14 @@ int X86TTIImpl::getArithmeticInstrCost(
  }

  static const CostTblEntry SSE2UniformConstCostTable[] = {
+    { ISD::SHL,  MVT::v16i8,   2 }, // psllw + pand.
+    { ISD::SRL,  MVT::v16i8,   2 }, // psrlw + pand.
+    { ISD::SRA,  MVT::v16i8,   4 }, // psrlw, pand, pxor, psubb.
+
+    { ISD::SHL,  MVT::v32i8,   4 }, // 2*(psllw + pand).
+    { ISD::SRL,  MVT::v32i8,   4 }, // 2*(psrlw + pand).
+    { ISD::SRA,  MVT::v32i8,   8 }, // 2*(psrlw, pand, pxor, psubb).
+
    { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
    { ISD::SDIV, MVT::v8i16,   6 }, // pmulhw sequence
    { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
@ -207,6 +223,43 @@ int X86TTIImpl::getArithmeticInstrCost(
      return LT.first * Entry->Cost;
  }

+  static const CostTblEntry AVX2UniformCostTable[] = {
+    // Uniform splats are cheaper for the following instructions.
+    { ISD::SHL,  MVT::v16i16, 1 }, // psllw.
+    { ISD::SRL,  MVT::v16i16, 1 }, // psrlw.
+    { ISD::SRA,  MVT::v16i16, 1 }, // psraw.
+  };
+
+  if (ST->hasAVX2() &&
+      ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+       (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+    if (const auto *Entry =
+            CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry SSE2UniformCostTable[] = {
+    // Uniform splats are cheaper for the following instructions.
+    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+
+    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+
+    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+  };
+
+  if (ST->hasSSE2() &&
+      ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+       (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+    if (const auto *Entry =
+            CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
  static const CostTblEntry AVX512DQCostTable[] = {
    { ISD::MUL,  MVT::v2i64, 1 },
    { ISD::MUL,  MVT::v4i64, 1 },
@ -219,6 +272,10 @@ int X86TTIImpl::getArithmeticInstrCost(
      return LT.first * Entry->Cost;

  static const CostTblEntry AVX512BWCostTable[] = {
+    { ISD::SHL,   MVT::v32i16,     1 }, // vpsllvw
+    { ISD::SRL,   MVT::v32i16,     1 }, // vpsrlvw
+    { ISD::SRA,   MVT::v32i16,     1 }, // vpsravw
+
    { ISD::MUL,   MVT::v64i8,     11 }, // extend/pmullw/trunc sequence.
    { ISD::MUL,   MVT::v32i8,      4 }, // extend/pmullw/trunc sequence.
    { ISD::MUL,   MVT::v16i8,      4 }, // extend/pmullw/trunc sequence.
@ -259,7 +316,7 @@ int X86TTIImpl::getArithmeticInstrCost(
    if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
      return LT.first * Entry->Cost;

-  static const CostTblEntry AVX2CostTable[] = {
+  static const CostTblEntry AVX2ShiftCostTable[] = {
    // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
    // customize them to detect the cases where shift amount is a scalar one.
    { ISD::SHL,     MVT::v4i32,    1 },
@ -283,11 +340,11 @@ int X86TTIImpl::getArithmeticInstrCost(
      // is lowered into a vector multiply (vpmullw).
      return LT.first;

-    if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
+    if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second))
      return LT.first * Entry->Cost;
  }

-  static const CostTblEntry XOPCostTable[] = {
+  static const CostTblEntry XOPShiftCostTable[] = {
    // 128bit shifts take 1cy, but right shifts require negation beforehand.
    { ISD::SHL,     MVT::v16i8,    1 },
    { ISD::SRL,     MVT::v16i8,    2 },
@ -318,93 +375,20 @@ int X86TTIImpl::getArithmeticInstrCost(

  // Look for XOP lowering tricks.
  if (ST->hasXOP())
-    if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
+    if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
      return LT.first * Entry->Cost;

-  static const CostTblEntry AVX2CustomCostTable[] = {
-    { ISD::SHL,  MVT::v32i8,      11 }, // vpblendvb sequence.
-    { ISD::SHL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
-
-    { ISD::SRL,  MVT::v32i8,      11 }, // vpblendvb sequence.
-    { ISD::SRL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
-
-    { ISD::SRA,  MVT::v32i8,      24 }, // vpblendvb sequence.
-    { ISD::SRA,  MVT::v16i16,     10 }, // extend/vpsravd/pack sequence.
-    { ISD::SRA,  MVT::v2i64,       4 }, // srl/xor/sub sequence.
-    { ISD::SRA,  MVT::v4i64,       4 }, // srl/xor/sub sequence.
-
-    { ISD::MUL,   MVT::v32i8,     17 }, // extend/pmullw/trunc sequence.
-    { ISD::MUL,   MVT::v16i8,      7 }, // extend/pmullw/trunc sequence.
-    { ISD::MUL,   MVT::v8i32,      1 }, // pmulld
-    { ISD::MUL,   MVT::v4i64,      8 }, // 3*pmuludq/3*shift/2*add
-
-    { ISD::FDIV,  MVT::f32,        7 }, // Haswell from http://www.agner.org/
-    { ISD::FDIV,  MVT::v4f32,      7 }, // Haswell from http://www.agner.org/
-    { ISD::FDIV,  MVT::v8f32,     14 }, // Haswell from http://www.agner.org/
-    { ISD::FDIV,  MVT::f64,       14 }, // Haswell from http://www.agner.org/
-    { ISD::FDIV,  MVT::v2f64,     14 }, // Haswell from http://www.agner.org/
-    { ISD::FDIV,  MVT::v4f64,     28 }, // Haswell from http://www.agner.org/
-  };
-
-  // Look for AVX2 lowering tricks for custom cases.
-  if (ST->hasAVX2())
-    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
-                                            LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry AVXCustomCostTable[] = {
-    { ISD::MUL,   MVT::v32i8,  26 }, // extend/pmullw/trunc sequence.
-
-    { ISD::FDIV,  MVT::f32,    14 }, // SNB from http://www.agner.org/
-    { ISD::FDIV,  MVT::v4f32,  14 }, // SNB from http://www.agner.org/
-    { ISD::FDIV,  MVT::v8f32,  28 }, // SNB from http://www.agner.org/
-    { ISD::FDIV,  MVT::f64,    22 }, // SNB from http://www.agner.org/
-    { ISD::FDIV,  MVT::v2f64,  22 }, // SNB from http://www.agner.org/
-    { ISD::FDIV,  MVT::v4f64,  44 }, // SNB from http://www.agner.org/
-
-    // Vectorizing division is a bad idea. See the SSE2 table for more comments.
-    { ISD::SDIV,  MVT::v32i8,  32*20 },
-    { ISD::SDIV,  MVT::v16i16, 16*20 },
-    { ISD::SDIV,  MVT::v8i32,  8*20 },
-    { ISD::SDIV,  MVT::v4i64,  4*20 },
-    { ISD::UDIV,  MVT::v32i8,  32*20 },
-    { ISD::UDIV,  MVT::v16i16, 16*20 },
-    { ISD::UDIV,  MVT::v8i32,  8*20 },
-    { ISD::UDIV,  MVT::v4i64,  4*20 },
-  };
-
-  // Look for AVX2 lowering tricks for custom cases.
-  if (ST->hasAVX())
-    if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
-                                            LT.second))
-      return LT.first * Entry->Cost;
-
-  static const CostTblEntry
-  SSE2UniformCostTable[] = {
+  static const CostTblEntry SSE2UniformShiftCostTable[] = {
    // Uniform splats are cheaper for the following instructions.
-    { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
-    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
-    { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
    { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
-    { ISD::SHL,  MVT::v4i32,  1 }, // pslld
    { ISD::SHL,  MVT::v8i32,  2 }, // pslld
-    { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
    { ISD::SHL,  MVT::v4i64,  2 }, // psllq.

-    { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
-    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
-    { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
    { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
-    { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
    { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
-    { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
    { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.

-    { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
-    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
-    { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
    { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
-    { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
    { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
    { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
    { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.
@ -414,7 +398,7 @@ int X86TTIImpl::getArithmeticInstrCost(
      ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
       (Op2Info == TargetTransformInfo::OK_UniformValue))) {
    if (const auto *Entry =
-            CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
+            CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second))
      return LT.first * Entry->Cost;
  }

@ -422,24 +406,98 @@ int X86TTIImpl::getArithmeticInstrCost(
      Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
    MVT VT = LT.second;
    // Vector shift left by non uniform constant can be lowered
-    // into vector multiply (pmullw/pmulld).
-    if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
-        (VT == MVT::v4i32 && ST->hasSSE41()))
-      return LT.first;
-
-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
-       (ST->hasAVX() && !ST->hasAVX2()))
-      ISD = ISD::MUL;
-
-    // A vector shift left by non uniform constant is converted
-    // into a vector multiply; the new multiply is eventually
-    // lowered into a sequence of shuffles and 2 x pmuludq.
-    if (VT == MVT::v4i32 && ST->hasSSE2())
+    // into vector multiply.
+    if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) ||
+        ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX()))
      ISD = ISD::MUL;
  }

+  static const CostTblEntry AVX2CostTable[] = {
+    { ISD::SHL,  MVT::v32i8,     11 }, // vpblendvb sequence.
+    { ISD::SHL,  MVT::v16i16,    10 }, // extend/vpsrlvd/pack sequence.
+
+    { ISD::SRL,  MVT::v32i8,     11 }, // vpblendvb sequence.
+    { ISD::SRL,  MVT::v16i16,    10 }, // extend/vpsrlvd/pack sequence.
+
+    { ISD::SRA,  MVT::v32i8,     24 }, // vpblendvb sequence.
+    { ISD::SRA,  MVT::v16i16,    10 }, // extend/vpsravd/pack sequence.
+    { ISD::SRA,  MVT::v2i64,      4 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,      4 }, // srl/xor/sub sequence.
+
+    { ISD::SUB,  MVT::v32i8,      1 }, // psubb
+    { ISD::ADD,  MVT::v32i8,      1 }, // paddb
+    { ISD::SUB,  MVT::v16i16,     1 }, // psubw
+    { ISD::ADD,  MVT::v16i16,     1 }, // paddw
+    { ISD::SUB,  MVT::v8i32,      1 }, // psubd
+    { ISD::ADD,  MVT::v8i32,      1 }, // paddd
+    { ISD::SUB,  MVT::v4i64,      1 }, // psubq
+    { ISD::ADD,  MVT::v4i64,      1 }, // paddq
+
+    { ISD::MUL,  MVT::v32i8,     17 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,  MVT::v16i8,      7 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,  MVT::v16i16,     1 }, // pmullw
+    { ISD::MUL,  MVT::v8i32,      1 }, // pmulld
+    { ISD::MUL,  MVT::v4i64,      8 }, // 3*pmuludq/3*shift/2*add
+
+    { ISD::FDIV, MVT::f32,        7 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f32,      7 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::v8f32,     14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::f64,       14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::v2f64,     14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f64,     28 }, // Haswell from http://www.agner.org/
+  };
+
+  // Look for AVX2 lowering tricks for custom cases.
+  if (ST->hasAVX2())
+    if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+
+  static const CostTblEntry AVX1CostTable[] = {
+    // We don't have to scalarize unsupported ops. We can issue two half-sized
+    // operations and we only need to extract the upper YMM half.
+    // Two ops + 1 extract + 1 insert = 4.
+    { ISD::MUL,     MVT::v16i16,     4 },
+    { ISD::MUL,     MVT::v8i32,      4 },
+    { ISD::SUB,     MVT::v32i8,      4 },
+    { ISD::ADD,     MVT::v32i8,      4 },
+    { ISD::SUB,     MVT::v16i16,     4 },
+    { ISD::ADD,     MVT::v16i16,     4 },
+    { ISD::SUB,     MVT::v8i32,      4 },
+    { ISD::ADD,     MVT::v8i32,      4 },
+    { ISD::SUB,     MVT::v4i64,      4 },
+    { ISD::ADD,     MVT::v4i64,      4 },
+
+    // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
+    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
+    // Because we believe v4i64 to be a legal type, we must also include the
+    // extract+insert in the cost table. Therefore, the cost here is 18
+    // instead of 8.
+    { ISD::MUL,     MVT::v4i64,     18 },
+
+    { ISD::MUL,     MVT::v32i8,     26 }, // extend/pmullw/trunc sequence.
+
+    { ISD::FDIV,    MVT::f32,       14 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f32,     14 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v8f32,     28 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::f64,       22 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v2f64,     22 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,    MVT::v4f64,     44 }, // SNB from http://www.agner.org/
+
+    // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+    { ISD::SDIV,    MVT::v32i8,  32*20 },
+    { ISD::SDIV,    MVT::v16i16, 16*20 },
+    { ISD::SDIV,    MVT::v8i32,   8*20 },
+    { ISD::SDIV,    MVT::v4i64,   4*20 },
+    { ISD::UDIV,    MVT::v32i8,  32*20 },
+    { ISD::UDIV,    MVT::v16i16, 16*20 },
+    { ISD::UDIV,    MVT::v8i32,   8*20 },
+    { ISD::UDIV,    MVT::v4i64,   4*20 },
+  };
+
+  if (ST->hasAVX())
+    if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+
  static const CostTblEntry SSE42CostTable[] = {
    { ISD::FDIV,  MVT::f32,   14 }, // Nehalem from http://www.agner.org/
    { ISD::FDIV,  MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
@ -456,6 +514,8 @@ int X86TTIImpl::getArithmeticInstrCost(
    { ISD::SHL,  MVT::v32i8,  2*11 }, // pblendvb sequence.
    { ISD::SHL,  MVT::v8i16,    14 }, // pblendvb sequence.
    { ISD::SHL,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v4i32,     4 }, // pslld/paddd/cvttps2dq/pmulld
+    { ISD::SHL,  MVT::v8i32,   2*4 }, // pslld/paddd/cvttps2dq/pmulld

    { ISD::SRL,  MVT::v16i8,    12 }, // pblendvb sequence.
    { ISD::SRL,  MVT::v32i8,  2*12 }, // pblendvb sequence.
@ -501,6 +561,7 @@ int X86TTIImpl::getArithmeticInstrCost(
    { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.

    { ISD::MUL,  MVT::v16i8,    12 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,  MVT::v8i16,     1 }, // pmullw
    { ISD::MUL,  MVT::v4i32,     6 }, // 3*pmuludq/4*shuffle
    { ISD::MUL,  MVT::v2i64,     8 }, // 3*pmuludq/3*shift/2*add

@ -516,46 +577,19 @@ int X86TTIImpl::getArithmeticInstrCost(
    // generally a bad idea. Assume somewhat arbitrarily that we have to be able
    // to hide "20 cycles" for each lane.
    { ISD::SDIV,  MVT::v16i8,  16*20 },
-    { ISD::SDIV,  MVT::v8i16,  8*20 },
-    { ISD::SDIV,  MVT::v4i32,  4*20 },
-    { ISD::SDIV,  MVT::v2i64,  2*20 },
+    { ISD::SDIV,  MVT::v8i16,   8*20 },
+    { ISD::SDIV,  MVT::v4i32,   4*20 },
+    { ISD::SDIV,  MVT::v2i64,   2*20 },
    { ISD::UDIV,  MVT::v16i8,  16*20 },
-    { ISD::UDIV,  MVT::v8i16,  8*20 },
-    { ISD::UDIV,  MVT::v4i32,  4*20 },
-    { ISD::UDIV,  MVT::v2i64,  2*20 },
+    { ISD::UDIV,  MVT::v8i16,   8*20 },
+    { ISD::UDIV,  MVT::v4i32,   4*20 },
+    { ISD::UDIV,  MVT::v2i64,   2*20 },
  };

  if (ST->hasSSE2())
    if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
      return LT.first * Entry->Cost;

-  static const CostTblEntry AVX1CostTable[] = {
-    // We don't have to scalarize unsupported ops. We can issue two half-sized
-    // operations and we only need to extract the upper YMM half.
-    // Two ops + 1 extract + 1 insert = 4.
-    { ISD::MUL,     MVT::v16i16,   4 },
-    { ISD::MUL,     MVT::v8i32,    4 },
-    { ISD::SUB,     MVT::v32i8,    4 },
-    { ISD::ADD,     MVT::v32i8,    4 },
-    { ISD::SUB,     MVT::v16i16,   4 },
-    { ISD::ADD,     MVT::v16i16,   4 },
-    { ISD::SUB,     MVT::v8i32,    4 },
-    { ISD::ADD,     MVT::v8i32,    4 },
-    { ISD::SUB,     MVT::v4i64,    4 },
-    { ISD::ADD,     MVT::v4i64,    4 },
-    // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
-    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
-    // Because we believe v4i64 to be a legal type, we must also include the
-    // extract+insert in the cost table. Therefore, the cost here is 18
-    // instead of 8.
-    { ISD::MUL,     MVT::v4i64,    18 },
-  };
-
-  // Look for AVX1 lowering tricks.
-  if (ST->hasAVX() && !ST->hasAVX2())
-    if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second))
-      return LT.first * Entry->Cost;
-
  static const CostTblEntry SSE1CostTable[] = {
    { ISD::FDIV, MVT::f32,   17 }, // Pentium III from http://www.agner.org/
    { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
@ -639,8 +673,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,

    { TTI::SK_Reverse,          MVT::v32i16, 1 }, // vpermw
    { TTI::SK_Reverse,          MVT::v16i16, 1 }, // vpermw
-    { TTI::SK_Reverse,          MVT::v64i8,  6 }, // vextracti64x4 + 2*vperm2i128
-                                                  // + 2*pshufb + vinserti64x4
+    { TTI::SK_Reverse,          MVT::v64i8,  2 }, // pshufb + vshufi64x2

    { TTI::SK_PermuteSingleSrc, MVT::v32i16, 1 }, // vpermw
    { TTI::SK_PermuteSingleSrc, MVT::v16i16, 1 }, // vpermw
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@ -42,6 +42,8 @@
 using namespace llvm;
 using namespace lowertypetests;

+using SummaryAction = LowerTypeTestsSummaryAction;
+
 #define DEBUG_TYPE "lowertypetests"

 STATISTIC(ByteArraySizeBits, "Byte array size in bits");
@ -55,9 +57,15 @@ static cl::opt<bool> AvoidReuse(
    cl::desc("Try to avoid reuse of byte array addresses using aliases"),
    cl::Hidden, cl::init(true));

-static cl::opt<std::string> ClSummaryAction(
+static cl::opt<SummaryAction> ClSummaryAction(
    "lowertypetests-summary-action",
-    cl::desc("What to do with the summary when running this pass"), cl::Hidden);
+    cl::desc("What to do with the summary when running this pass"),
+    cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
+               clEnumValN(SummaryAction::Import, "import",
+                          "Import typeid resolutions from summary and globals"),
+               clEnumValN(SummaryAction::Export, "export",
+                          "Export typeid resolutions to summary and globals")),
+    cl::Hidden);

 static cl::opt<std::string> ClReadSummary(
    "lowertypetests-read-summary",
@ -226,8 +234,8 @@ class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
 class LowerTypeTestsModule {
  Module &M;

-  // This is for testing purposes only.
-  std::unique_ptr<ModuleSummaryIndex> OwnedSummary;
+  SummaryAction Action;
+  ModuleSummaryIndex *Summary;

  bool LinkerSubsectionsViaSymbols;
  Triple::ArchType Arch;
@ -319,21 +327,38 @@ class LowerTypeTestsModule {
  void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);

 public:
-  LowerTypeTestsModule(Module &M);
-  ~LowerTypeTestsModule();
+  LowerTypeTestsModule(Module &M, SummaryAction Action,
+                       ModuleSummaryIndex *Summary);
  bool lower();
+
+  // Lower the module using the action and summary passed as command line
+  // arguments. For testing purposes only.
+  static bool runForTesting(Module &M);
 };

 struct LowerTypeTests : public ModulePass {
  static char ID;
-  LowerTypeTests() : ModulePass(ID) {
+
+  bool UseCommandLine = false;
+
+  SummaryAction Action;
+  ModuleSummaryIndex *Summary;
+
+  LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
+    initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
+  }
+
+  LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
+      : ModulePass(ID), Action(Action), Summary(Summary) {
    initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
  }

  bool runOnModule(Module &M) override {
    if (skipModule(M))
      return false;
-    return LowerTypeTestsModule(M).lower();
+    if (UseCommandLine)
+      return LowerTypeTestsModule::runForTesting(M);
+    return LowerTypeTestsModule(M, Action, Summary).lower();
  }
 };

@ -343,7 +368,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
                false)
 char LowerTypeTests::ID = 0;

-ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; }
+ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
+                                           ModuleSummaryIndex *Summary) {
+  return new LowerTypeTests(Action, Summary);
+}

 /// Build a bit set for TypeId using the object layouts in
 /// GlobalLayout.
@ -1145,22 +1173,12 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
 }

 /// Lower all type tests in this module.
-LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
-  // Handle the command-line summary arguments. This code is for testing
-  // purposes only, so we handle errors directly.
-  if (!ClSummaryAction.empty()) {
-    OwnedSummary = make_unique<ModuleSummaryIndex>();
-    if (!ClReadSummary.empty()) {
-      ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
-                            ": ");
-      auto ReadSummaryFile =
-          ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
-
-      yaml::Input In(ReadSummaryFile->getBuffer());
-      In >> *OwnedSummary;
-      ExitOnErr(errorCodeToError(In.error()));
-    }
-  }
+LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
+                                           ModuleSummaryIndex *Summary)
+    : M(M), Action(Action), Summary(Summary) {
+  // FIXME: Use these fields.
+  (void)this->Action;
+  (void)this->Summary;

  Triple TargetTriple(M.getTargetTriple());
  LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
@ -1169,18 +1187,36 @@ LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
  ObjectFormat = TargetTriple.getObjectFormat();
 }

-LowerTypeTestsModule::~LowerTypeTestsModule() {
-  if (ClSummaryAction.empty() || ClWriteSummary.empty())
-    return;
+bool LowerTypeTestsModule::runForTesting(Module &M) {
+  ModuleSummaryIndex Summary;

-  ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
-                        ": ");
-  std::error_code EC;
-  raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
-  ExitOnErr(errorCodeToError(EC));
+  // Handle the command-line summary arguments. This code is for testing
+  // purposes only, so we handle errors directly.
+  if (!ClReadSummary.empty()) {
+    ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
+                          ": ");
+    auto ReadSummaryFile =
+        ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));

-  yaml::Output Out(OS);
-  Out << *OwnedSummary;
+    yaml::Input In(ReadSummaryFile->getBuffer());
+    In >> Summary;
+    ExitOnErr(errorCodeToError(In.error()));
+  }
+
+  bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
+
+  if (!ClWriteSummary.empty()) {
+    ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
+                          ": ");
+    std::error_code EC;
+    raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+    ExitOnErr(errorCodeToError(EC));
+
+    yaml::Output Out(OS);
+    Out << Summary;
+  }
+
+  return Changed;
 }

 bool LowerTypeTestsModule::lower() {
@ -1313,7 +1349,8 @@ bool LowerTypeTestsModule::lower() {

 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
                                          ModuleAnalysisManager &AM) {
-  bool Changed = LowerTypeTestsModule(M).lower();
+  bool Changed =
+      LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
  if (!Changed)
    return PreservedAnalyses::all();
  return PreservedAnalyses::none();
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@ -857,7 +857,8 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
  // Lower type metadata and the type.test intrinsic. This pass supports Clang's
  // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
  // link time if CFI is enabled. The pass does nothing if CFI is disabled.
-  PM.add(createLowerTypeTestsPass());
+  PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
+                                  /*Summary=*/nullptr));

  if (OptLevel != 0)
    addLateLTOOptimizationPasses(PM);
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -1903,7 +1903,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
    return foldICmpShlOne(Cmp, Shl, C);

  // Check that the shift amount is in range. If not, don't perform undefined
-  // shifts. When the shift is visited it will be simplified.
+  // shifts. When the shift is visited, it will be simplified.
  unsigned TypeBits = C->getBitWidth();
  if (ShiftAmt->uge(TypeBits))
    return nullptr;
@ -1923,7 +1923,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
      return new ICmpInst(Pred, X, LShrC);

    if (Shl->hasOneUse()) {
-      // Otherwise strength reduce the shift into an and.
+      // Otherwise, strength reduce the shift into an and.
      Constant *Mask = ConstantInt::get(Shl->getType(),
          APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));

@ -1951,7 +1951,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
  }

  // When the shift is nuw and pred is >u or <=u, comparison only really happens
-  // in the pre-shifted bits. Since InstSimplify canoncalizes <=u into <u, the
+  // in the pre-shifted bits. Since InstSimplify canonicalizes <=u into <u, the
  // <=u case can be further converted to match <u (see below).
  if (Shl->hasNoUnsignedWrap() &&
      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) {
@ -1970,9 +1970,9 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
  // Transform (icmp pred iM (shl iM %v, N), C)
  // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
  // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
-  // This enables us to get rid of the shift in favor of a trunc which can be
+  // This enables us to get rid of the shift in favor of a trunc that may be
  // free on the target. It has the additional benefit of comparing to a
-  // smaller constant, which will be target friendly.
+  // smaller constant that may be more target-friendly.
  unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
  if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt &&
      DL.isLegalInteger(TypeBits - Amt)) {
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@ -1818,6 +1818,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
    RegisteredFlag = new GlobalVariable(
        M, IntptrTy, false, GlobalVariable::CommonLinkage,
        ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
+    RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);

    // Update llvm.compiler.used, adding the new liveness globals. This is
    // needed so that during LTO these variables stay alive. The alternative
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@ -1423,7 +1423,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
    if (widenLoopCompare(DU))
      return nullptr;

-    // This user does not evaluate to a recurence after widening, so don't
+    // This user does not evaluate to a recurrence after widening, so don't
    // follow it. Instead insert a Trunc to kill off the original use,
    // eventually isolating the original narrow IV so it can be removed.
    truncateIVUse(DU, DT, LI);
--- a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@ -415,7 +415,9 @@ class LoadEliminationForLoop {
    Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
                                          PH->getTerminator());
    Value *Initial =
-        new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
+        new LoadInst(InitialPtr, "load_initial", /* isVolatile */ false,
+                     Cand.Load->getAlignment(), PH->getTerminator());
+
    PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
                                   &L->getHeader()->front());
    PHI->addIncoming(Initial, PH);
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@ -1382,8 +1382,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
        Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
                                   Succ->begin(), Succ->end());
        LPM->deleteSimpleAnalysisValue(BI, L);
-        BI->eraseFromParent();
        RemoveFromWorklist(BI, Worklist);
+        BI->eraseFromParent();

        // Remove Succ from the loop tree.
        LI->removeBlock(Succ);
--- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@ -79,7 +79,8 @@ STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
 STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
 STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
 STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
-STATISTIC(NumGVNMaxIterations, "Maximum Number of iterations it took to converge GVN");
+STATISTIC(NumGVNMaxIterations,
+          "Maximum Number of iterations it took to converge GVN");

 //===----------------------------------------------------------------------===//
 //                                GVN Pass
@ -327,7 +328,7 @@ class NewGVN : public FunctionPass {
  // Elimination.
  struct ValueDFS;
  void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
-                                std::vector<ValueDFS> &);
+                                SmallVectorImpl<ValueDFS> &);

  bool eliminateInstructions(Function &);
  void replaceInstruction(Instruction *, Value *);
@ -336,8 +337,11 @@ class NewGVN : public FunctionPass {

  // New instruction creation.
  void handleNewInstruction(Instruction *){};
+
+  // Various instruction touch utilities
  void markUsersTouched(Value *);
  void markMemoryUsersTouched(MemoryAccess *);
+  void markLeaderChangeTouched(CongruenceClass *CC);

  // Utilities.
  void cleanupTables();
@ -390,10 +394,10 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)

 PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
-  BasicBlock *PhiBlock = I->getParent();
+  BasicBlock *PHIBlock = I->getParent();
  auto *PN = cast<PHINode>(I);
-  auto *E = new (ExpressionAllocator)
-      PHIExpression(PN->getNumOperands(), I->getParent());
+  auto *E =
+      new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);

  E->allocateOperands(ArgRecycler, ExpressionAllocator);
  E->setType(I->getType());
@ -408,10 +412,10 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {

  std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                 [&](const Use &U) -> Value * {
-                   // Don't try to transform self-defined phis
+                   // Don't try to transform self-defined phis.
                   if (U == PN)
                     return PN;
-                   const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock);
+                   const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
                   return lookupOperandLeader(U, I, BBE);
                 });
  return E;
@ -710,6 +714,15 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
  return E;
 }

+// Utility function to check whether the congruence class has a member other
+// than the given instruction.
+bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
+  // Either it has more than one member, in which case it must contain something
+  // other than us (because it's indexed by value), or if it only has one member
+  // right now, that member should not be us.
+  return CC->Members.size() > 1 || CC->Members.count(I) == 0;
+}
+
 const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
                                                         const BasicBlock *B) {
  // Unlike loads, we never try to eliminate stores, so we do not check if they
@ -725,8 +738,12 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
        cast<MemoryDef>(StoreAccess)->getDefiningAccess());
    const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
    CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
+    // Basically, check if the congruence class the store is in is defined by a
+    // store that isn't us, and has the same value.  MemorySSA takes care of
+    // ensuring the store has the same memory state as us already.
    if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
-        CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
+        CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
+        hasMemberOtherThanUs(CC, I))
      return createStoreExpression(SI, StoreRHS, B);
  }

@ -810,36 +827,50 @@ bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
 const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
                                                       const BasicBlock *B) {
  auto *E = cast<PHIExpression>(createPHIExpression(I));
-  if (E->op_empty()) {
+  // We match the semantics of SimplifyPhiNode from InstructionSimplify here.
+
+  // See if all arguaments are the same.
+  // We track if any were undef because they need special handling.
+  bool HasUndef = false;
+  auto Filtered = make_filter_range(E->operands(), [&](const Value *Arg) {
+    if (Arg == I)
+      return false;
+    if (isa<UndefValue>(Arg)) {
+      HasUndef = true;
+      return false;
+    }
+    return true;
+  });
+  // If we are left with no operands, it's undef
+  if (Filtered.begin() == Filtered.end()) {
    DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
                 << "\n");
    E->deallocateOperands(ArgRecycler);
    ExpressionAllocator.Deallocate(E);
    return createConstantExpression(UndefValue::get(I->getType()));
  }
-
-  Value *AllSameValue = E->getOperand(0);
-
-  // See if all arguments are the same, ignoring undef arguments, because we can
-  // choose a value that is the same for them.
-  for (const Value *Arg : E->operands())
-    if (Arg != AllSameValue && !isa<UndefValue>(Arg)) {
-      AllSameValue = nullptr;
-      break;
+  Value *AllSameValue = *(Filtered.begin());
+  ++Filtered.begin();
+  // Can't use std::equal here, sadly, because filter.begin moves.
+  if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
+        return V == AllSameValue;
+      })) {
+    // In LLVM's non-standard representation of phi nodes, it's possible to have
+    // phi nodes with cycles (IE dependent on other phis that are .... dependent
+    // on the original phi node), especially in weird CFG's where some arguments
+    // are unreachable, or uninitialized along certain paths.  This can cause
+    // infinite loops during evaluation. We work around this by not trying to
+    // really evaluate them independently, but instead using a variable
+    // expression to say if one is equivalent to the other.
+    // We also special case undef, so that if we have an undef, we can't use the
+    // common value unless it dominates the phi block.
+    if (HasUndef) {
+      // Only have to check for instructions
+      if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
+        if (!DT->dominates(AllSameInst, I))
+          return E;
    }

-  if (AllSameValue) {
-    // It's possible to have phi nodes with cycles (IE dependent on
-    // other phis that are .... dependent on the original phi node),
-    // especially in weird CFG's where some arguments are unreachable, or
-    // uninitialized along certain paths.
-    // This can cause infinite loops  during evaluation (even if you disable
-    // the recursion below, you will simply ping-pong between congruence
-    // classes). If a phi node symbolically evaluates to another phi node,
-    // just leave it alone. If they are really the same, we will still
-    // eliminate them in favor of each other.
-    if (isa<PHINode>(AllSameValue))
-      return E;
    NumGVNPhisAllSame++;
    DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
                 << "\n");
@ -1007,12 +1038,22 @@ void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
  }
 }

+// Touch the instructions that need to be updated after a congruence class has a
+// leader change, and mark changed values.
+void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
+  for (auto M : CC->Members) {
+    if (auto *I = dyn_cast<Instruction>(M))
+      TouchedInstructions.set(InstrDFS[I]);
+    ChangedValues.insert(M);
+  }
+}
+
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
-
  ValueToExpression[V] = E;
  // This is guaranteed to return something, since it will at least find
  // INITIAL.
+
  CongruenceClass *VClass = ValueToClass[V];
  assert(VClass && "Should have found a vclass");
  // Dead classes should have been eliminated from the mapping.
@ -1031,14 +1072,17 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
      place->second = NewClass;

      // Constants and variables should always be made the leader.
-      if (const auto *CE = dyn_cast<ConstantExpression>(E))
+      if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
        NewClass->RepLeader = CE->getConstantValue();
-      else if (const auto *VE = dyn_cast<VariableExpression>(E))
-        NewClass->RepLeader = VE->getVariableValue();
-      else if (const auto *SE = dyn_cast<StoreExpression>(E))
-        NewClass->RepLeader = SE->getStoreInst()->getValueOperand();
-      else
+      } else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
+        StoreInst *SI = SE->getStoreInst();
+        NewClass->RepLeader =
+            lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+      } else {
        NewClass->RepLeader = V;
+      }
+      assert(!isa<VariableExpression>(E) &&
+             "VariableExpression should have been handled already");

      EClass = NewClass;
      DEBUG(dbgs() << "Created new congruence class for " << *V
@ -1077,14 +1121,11 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
          ExpressionToClass.erase(VClass->DefiningExpr);
        }
      } else if (VClass->RepLeader == V) {
-        // FIXME: When the leader changes, the value numbering of
-        // everything may change, so we need to reprocess.
+        // When the leader changes, the value numbering of
+        // everything may change due to symbolization changes, so we need to
+        // reprocess.
        VClass->RepLeader = *(VClass->Members.begin());
-        for (auto M : VClass->Members) {
-          if (auto *I = dyn_cast<Instruction>(M))
-            TouchedInstructions.set(InstrDFS[I]);
-          ChangedValues.insert(M);
-        }
+        markLeaderChangeTouched(VClass);
      }
    }

@ -1106,6 +1147,27 @@ void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
        markMemoryUsersTouched(MA);
      }
    }
+  } else if (StoreInst *SI = dyn_cast<StoreInst>(V)) {
+    // There is, sadly, one complicating thing for stores.  Stores do not
+    // produce values, only consume them.  However, in order to make loads and
+    // stores value number the same, we ignore the value operand of the store.
+    // But the value operand will still be the leader of our class, and thus, it
+    // may change.  Because the store is a use, the store will get reprocessed,
+    // but nothing will change about it, and so nothing above will catch it
+    // (since the class will not change).  In order to make sure everything ends
+    // up okay, we need to recheck the leader of the class.  Since stores of
+    // different values value number differently due to different memorydefs, we
+    // are guaranteed the leader is always the same between stores in the same
+    // class.
+    DEBUG(dbgs() << "Checking store leader\n");
+    auto ProperLeader =
+        lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+    if (EClass->RepLeader != ProperLeader) {
+      DEBUG(dbgs() << "Store leader changed, fixing\n");
+      EClass->RepLeader = ProperLeader;
+      markLeaderChangeTouched(EClass);
+      markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
+    }
  }
 }

@ -1708,8 +1770,9 @@ struct NewGVN::ValueDFS {
  }
 };

-void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
-                                      std::vector<ValueDFS> &DFSOrderedSet) {
+void NewGVN::convertDenseToDFSOrdered(
+    CongruenceClass::MemberSet &Dense,
+    SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
  for (auto D : Dense) {
    // First add the value.
    BasicBlock *BB = getBlockForValue(D);
@ -1972,21 +2035,25 @@ bool NewGVN::eliminateInstructions(Function &F) {
        ValueDFSStack EliminationStack;

        // Convert the members to DFS ordered sets and then merge them.
-        std::vector<ValueDFS> DFSOrderedSet;
+        SmallVector<ValueDFS, 8> DFSOrderedSet;
        convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);

        // Sort the whole thing.
-        sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
+        std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());

-        for (auto &C : DFSOrderedSet) {
-          int MemberDFSIn = C.DFSIn;
-          int MemberDFSOut = C.DFSOut;
-          Value *Member = C.Val;
-          Use *MemberUse = C.U;
+        for (auto &VD : DFSOrderedSet) {
+          int MemberDFSIn = VD.DFSIn;
+          int MemberDFSOut = VD.DFSOut;
+          Value *Member = VD.Val;
+          Use *MemberUse = VD.U;

-          // We ignore void things because we can't get a value from them.
-          if (Member && Member->getType()->isVoidTy())
-            continue;
+          if (Member) {
+            // We ignore void things because we can't get a value from them.
+            // FIXME: We could actually use this to kill dead stores that are
+            // dominated by equivalent earlier stores.
+            if (Member->getType()->isVoidTy())
+              continue;
+          }

          if (EliminationStack.empty()) {
            DEBUG(dbgs() << "Elimination Stack is empty\n");
@ -1995,8 +2062,6 @@ bool NewGVN::eliminateInstructions(Function &F) {
                         << EliminationStack.dfs_back().first << ","
                         << EliminationStack.dfs_back().second << ")\n");
          }
-          if (Member && isa<Constant>(Member))
-            assert(isa<Constant>(CC->RepLeader));

          DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << ","
                       << MemberDFSOut << ")\n");
@ -2037,11 +2102,8 @@ bool NewGVN::eliminateInstructions(Function &F) {
            continue;
          Value *Result = EliminationStack.back();

-          // Don't replace our existing users with ourselves, and don't replace
-          // phi node arguments with the result of the same phi node.
-          // IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef)
-          if (MemberUse->get() == Result ||
-              (isa<PHINode>(Result) && MemberUse->getUser() == Result))
+          // Don't replace our existing users with ourselves.
+          if (MemberUse->get() == Result)
            continue;

          DEBUG(dbgs() << "Found replacement " << *Result << " for "
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@ -511,9 +511,6 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
  void visitSelectInst(SelectInst &I);
  void visitBinaryOperator(Instruction &I);
  void visitCmpInst(CmpInst &I);
-  void visitExtractElementInst(ExtractElementInst &I);
-  void visitInsertElementInst(InsertElementInst &I);
-  void visitShuffleVectorInst(ShuffleVectorInst &I);
  void visitExtractValueInst(ExtractValueInst &EVI);
  void visitInsertValueInst(InsertValueInst &IVI);
  void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
@ -970,21 +967,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
  markOverdefined(&I);
 }

-void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
-  // TODO : SCCP does not handle vectors properly.
-  return markOverdefined(&I);
-}
-
-void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
-  // TODO : SCCP does not handle vectors properly.
-  return markOverdefined(&I);
-}
-
-void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
-  // TODO : SCCP does not handle vectors properly.
-  return markOverdefined(&I);
-}
-
 // Handle getelementptr instructions.  If all operands are constants then we
 // can turn this into a getelementptr ConstantExpr.
 //
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@ -67,12 +67,15 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
    return true;
  }

-  // When exporting, consult the index.
-  auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID());
-  assert(Summaries != ImportIndex.end() &&
-         "Missing summary for global value when exporting");
-  assert(Summaries->second.size() == 1 && "Local has more than one summary");
-  auto Linkage = Summaries->second.front()->linkage();
+  // When exporting, consult the index. We can have more than one local
+  // with the same GUID, in the case of same-named locals in different but
+  // same-named source files that were compiled in their respective directories
+  // (so the source file name and resulting GUID is the same). Find the one
+  // in this module.
+  auto Summary = ImportIndex.findSummaryInModule(
+      SGV->getGUID(), SGV->getParent()->getModuleIdentifier());
+  assert(Summary && "Missing summary for global value when exporting");
+  auto Linkage = Summary->linkage();
  if (!GlobalValue::isLocalLinkage(Linkage)) {
    assert(!isNonRenamableLocal(*SGV) &&
           "Attempting to promote non-renamable local");
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@ -1189,19 +1189,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {

 Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
  Function *Callee = CI->getCalledFunction();
-  Value *Ret = nullptr;
  StringRef Name = Callee->getName();
  if (Name == "fabs" && hasFloatVersion(Name))
-    Ret = optimizeUnaryDoubleFP(CI, B, false);
+    return optimizeUnaryDoubleFP(CI, B, false);

-  Value *Op = CI->getArgOperand(0);
-  if (Instruction *I = dyn_cast<Instruction>(Op)) {
-    // Fold fabs(x * x) -> x * x; any squared FP value must already be positive.
-    if (I->getOpcode() == Instruction::FMul)
-      if (I->getOperand(0) == I->getOperand(1))
-        return Op;
-  }
-  return Ret;
+  return nullptr;
 }

 Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -783,6 +783,10 @@ class InnerLoopVectorizer {
  // Similarly, we create a new latch condition when setting up the structure
  // of the new loop, so the old one can become dead.
  SmallPtrSet<Instruction *, 4> DeadInstructions;
+
+  // Holds the end values for each induction variable. We save the end values
+  // so we can later fix-up the external users of the induction variables.
+  DenseMap<PHINode *, Value *> IVEndValues;
 };

 class InnerLoopUnroller : public InnerLoopVectorizer {
@ -1879,13 +1883,6 @@ class LoopVectorizationCostModel {
  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
                                 unsigned LoopCost);

-  /// \return The most profitable unroll factor.
-  /// This method finds the best unroll-factor based on register pressure and
-  /// other parameters. VF and LoopCost are the selected vectorization factor
-  /// and the cost of the selected VF.
-  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
-                                  unsigned LoopCost);
-
  /// \brief A struct that represents some properties of the register usage
  /// of a loop.
  struct RegisterUsage {
@ -3424,7 +3421,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
    // Create phi nodes to merge from the  backedge-taken check block.
    PHINode *BCResumeVal = PHINode::Create(
        OrigPhi->getType(), 3, "bc.resume.val", ScalarPH->getTerminator());
-    Value *EndValue;
+    Value *&EndValue = IVEndValues[OrigPhi];
    if (OrigPhi == OldInduction) {
      // We know what the end value is.
      EndValue = CountRoundDown;
@ -3443,9 +3440,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
    // or the value at the end of the vectorized loop.
    BCResumeVal->addIncoming(EndValue, MiddleBlock);

-    // Fix up external users of the induction variable.
-    fixupIVUsers(OrigPhi, II, CountRoundDown, EndValue, MiddleBlock);
-
    // Fix the scalar body counter (PHI node).
    unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);

@ -4116,11 +4110,23 @@ void InnerLoopVectorizer::vectorizeLoop() {
    Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
  } // end of for each Phi in PHIsToFix.

-  fixLCSSAPHIs();
-
-  // Make sure DomTree is updated.
+  // Update the dominator tree.
+  //
+  // FIXME: After creating the structure of the new loop, the dominator tree is
+  //        no longer up-to-date, and it remains that way until we update it
+  //        here. An out-of-date dominator tree is problematic for SCEV,
+  //        because SCEVExpander uses it to guide code generation. The
+  //        vectorizer use SCEVExpanders in several places. Instead, we should
+  //        keep the dominator tree up-to-date as we go.
  updateAnalysis();

+  // Fix-up external users of the induction variables.
+  for (auto &Entry : *Legal->getInductionVars())
+    fixupIVUsers(Entry.first, Entry.second,
+                 getOrCreateVectorTripCount(LI->getLoopFor(LoopVectorBody)),
+                 IVEndValues[Entry.first], LoopMiddleBlock);
+
+  fixLCSSAPHIs();
  predicateInstructions();

  // Remove redundant induction instructions.
--- a/contrib/llvm/tools/clang/include/clang/AST/Expr.h
+++ b/contrib/llvm/tools/clang/include/clang/AST/Expr.h
@ -651,7 +651,8 @@ class Expr : public Stmt {
  /// constant.
  bool EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
                                const FunctionDecl *Callee,
-                                ArrayRef<const Expr*> Args) const;
+                                ArrayRef<const Expr*> Args,
+                                const Expr *This = nullptr) const;

  /// \brief If the current Expr is a pointer, this will try to statically
  /// determine the number of bytes available where the pointer is pointing.
--- a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td
@ -140,12 +140,15 @@ class Argument<string name, bit optional, bit fake = 0> {
  bit Fake = fake;
 }

-class BoolArgument<string name, bit opt = 0> : Argument<name, opt>;
+class BoolArgument<string name, bit opt = 0, bit fake = 0> : Argument<name, opt,
+                                                                      fake>;
 class IdentifierArgument<string name, bit opt = 0> : Argument<name, opt>;
 class IntArgument<string name, bit opt = 0> : Argument<name, opt>;
 class StringArgument<string name, bit opt = 0> : Argument<name, opt>;
 class ExprArgument<string name, bit opt = 0> : Argument<name, opt>;
-class FunctionArgument<string name, bit opt = 0> : Argument<name, opt>;
+class FunctionArgument<string name, bit opt = 0, bit fake = 0> : Argument<name,
+                                                                          opt,
+                                                                          fake>;
 class TypeArgument<string name, bit opt = 0> : Argument<name, opt>;
 class UnsignedArgument<string name, bit opt = 0> : Argument<name, opt>;
 class VariadicUnsignedArgument<string name> : Argument<name, 1>;
@ -1591,6 +1594,26 @@ def Unavailable : InheritableAttr {
  let Documentation = [Undocumented];
 }

+def DiagnoseIf : InheritableAttr {
+  let Spellings = [GNU<"diagnose_if">];
+  let Subjects = SubjectList<[Function]>;
+  let Args = [ExprArgument<"Cond">, StringArgument<"Message">,
+              EnumArgument<"DiagnosticType",
+                           "DiagnosticType",
+                           ["error", "warning"],
+                           ["DT_Error", "DT_Warning"]>,
+              BoolArgument<"ArgDependent", 0, /*fake*/ 1>,
+              FunctionArgument<"Parent", 0, /*fake*/ 1>];
+  let DuplicatesAllowedWhileMerging = 1;
+  let LateParsed = 1;
+  let AdditionalMembers = [{
+    bool isError() const { return diagnosticType == DT_Error; }
+    bool isWarning() const { return diagnosticType == DT_Warning; }
+  }];
+  let TemplateDependent = 1;
+  let Documentation = [DiagnoseIfDocs];
+}
+
 def ArcWeakrefUnavailable : InheritableAttr {
  let Spellings = [GNU<"objc_arc_weak_reference_unavailable">];
  let Subjects = SubjectList<[ObjCInterface], ErrorDiag>;
--- a/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/AttrDocs.td
@ -378,6 +378,65 @@ template instantiation, so the value for ``T::number`` is known.
  }];
 }

+def DiagnoseIfDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``diagnose_if`` attribute can be placed on function declarations to emit
+warnings or errors at compile-time if calls to the attributed function meet
+certain user-defined criteria. For example:
+
+.. code-block:: c
+  void abs(int a)
+    __attribute__((diagnose_if(a >= 0, "Redundant abs call", "warning")));
+  void must_abs(int a)
+    __attribute__((diagnose_if(a >= 0, "Redundant abs call", "error")));
+
+  int val = abs(1); // warning: Redundant abs call
+  int val2 = must_abs(1); // error: Redundant abs call
+  int val3 = abs(val);
+  int val4 = must_abs(val); // Because run-time checks are not emitted for
+                            // diagnose_if attributes, this executes without
+                            // issue.
+
+
+``diagnose_if`` is closely related to ``enable_if``, with a few key differences:
+
+* Overload resolution is not aware of ``diagnose_if`` attributes: they're
+  considered only after we select the best candidate from a given candidate set.
+* Function declarations that differ only in their ``diagnose_if`` attributes are
+  considered to be redeclarations of the same function (not overloads).
+* If the condition provided to ``diagnose_if`` cannot be evaluated, no
+  diagnostic will be emitted.
+
+Otherwise, ``diagnose_if`` is essentially the logical negation of ``enable_if``.
+
+As a result of bullet number two, ``diagnose_if`` attributes will stack on the
+same function. For example:
+
+.. code-block:: c
+
+  int foo() __attribute__((diagnose_if(1, "diag1", "warning")));
+  int foo() __attribute__((diagnose_if(1, "diag2", "warning")));
+
+  int bar = foo(); // warning: diag1
+                   // warning: diag2
+  int (*fooptr)(void) = foo; // warning: diag1
+                             // warning: diag2
+
+  constexpr int supportsAPILevel(int N) { return N < 5; }
+  int baz(int a)
+    __attribute__((diagnose_if(!supportsAPILevel(10),
+                               "Upgrade to API level 10 to use baz", "error")));
+  int baz(int a)
+    __attribute__((diagnose_if(!a, "0 is not recommended.", "warning")));
+
+  int (*bazptr)(int) = baz; // error: Upgrade to API level 10 to use baz
+  int v = baz(0); // error: Upgrade to API level 10 to use baz
+
+Query for this feature with ``__has_attribute(diagnose_if)``.
+  }];
+}
+
 def PassObjectSizeDocs : Documentation {
  let Category = DocCatVariable; // Technically it's a parameter doc, but eh.
  let Content = [{
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticCommonKinds.td
@ -161,6 +161,8 @@ def ext_old_implicitly_unsigned_long_cxx : ExtWarn<
  InGroup<CXX11Compat>;
 def ext_clang_enable_if : Extension<"'enable_if' is a clang extension">,
                          InGroup<GccCompat>;
+def ext_clang_diagnose_if : Extension<"'diagnose_if' is a clang extension">,
+                            InGroup<GccCompat>;

 // SEH
 def err_seh_expected_handler : Error<
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticGroups.td
@ -495,6 +495,7 @@ def UnusedPropertyIvar :  DiagGroup<"unused-property-ivar">;
 def UnusedGetterReturnValue : DiagGroup<"unused-getter-return-value">;
 def UsedButMarkedUnused : DiagGroup<"used-but-marked-unused">;
 def UserDefinedLiterals : DiagGroup<"user-defined-literals">;
+def UserDefinedWarnings : DiagGroup<"user-defined-warnings">;
 def Reorder : DiagGroup<"reorder">;
 def UndeclaredSelector : DiagGroup<"undeclared-selector">;
 def ImplicitAtomic : DiagGroup<"implicit-atomic-properties">;
@ -683,7 +684,8 @@ def Most : DiagGroup<"most", [
    OverloadedVirtual,
    PrivateExtern,
    SelTypeCast,
-    ExternCCompat
+    ExternCCompat,
+    UserDefinedWarnings
 ]>;

 // Thread Safety warnings 
--- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@ -2141,8 +2141,11 @@ def err_constexpr_local_var_no_init : Error<
 def ext_constexpr_function_never_constant_expr : ExtWarn<
  "constexpr %select{function|constructor}0 never produces a "
  "constant expression">, InGroup<DiagGroup<"invalid-constexpr">>, DefaultError;
-def err_enable_if_never_constant_expr : Error<
-  "'enable_if' attribute expression never produces a constant expression">;
+def err_attr_cond_never_constant_expr : Error<
+  "%0 attribute expression never produces a constant expression">;
+def err_diagnose_if_invalid_diagnostic_type : Error<
+  "invalid diagnostic type for 'diagnose_if'; use \"error\" or \"warning\" "
+  "instead">;
 def err_constexpr_body_no_return : Error<
  "no return statement in constexpr function">;
 def err_constexpr_return_missing_expr : Error<
@ -3333,6 +3336,9 @@ def note_ovl_candidate : Note<"candidate "

 def note_ovl_candidate_inherited_constructor : Note<
    "constructor from base class %0 inherited here">;
+def note_ovl_candidate_inherited_constructor_slice : Note<
+    "constructor inherited from base class cannot be used to initialize from "
+    "an argument of the derived class type">;
 def note_ovl_candidate_illegal_constructor : Note<
    "candidate %select{constructor|template}0 ignored: "
    "instantiation %select{takes|would take}0 its own class type by value">;
@ -3366,7 +3372,9 @@ def note_ovl_candidate_disabled_by_enable_if : Note<
 def note_ovl_candidate_has_pass_object_size_params: Note<
    "candidate address cannot be taken because parameter %0 has "
    "pass_object_size attribute">;
-def note_ovl_candidate_disabled_by_enable_if_attr : Note<
+def err_diagnose_if_succeeded : Error<"%0">;
+def warn_diagnose_if_succeeded : Warning<"%0">, InGroup<UserDefinedWarnings>;
+def note_ovl_candidate_disabled_by_function_cond_attr : Note<
    "candidate disabled: %0">;
 def note_ovl_candidate_disabled_by_extension : Note<
    "candidate disabled due to OpenCL extension">;
@ -4395,6 +4403,7 @@ def note_not_found_by_two_phase_lookup : Note<"%0 should be declared prior to th
 def err_undeclared_use : Error<"use of undeclared %0">;
 def warn_deprecated : Warning<"%0 is deprecated">,
    InGroup<DeprecatedDeclarations>;
+def note_from_diagnose_if : Note<"from 'diagnose_if' attribute on %0:">;
 def warn_property_method_deprecated :
    Warning<"property access is using %0 method which is deprecated">,
    InGroup<DeprecatedDeclarations>;
--- a/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Basic/LangOptions.def
@ -146,6 +146,7 @@ LANGOPT(Modules           , 1, 0, "modules extension to C")
 COMPATIBLE_LANGOPT(ModulesTS  , 1, 0, "C++ Modules TS")
 BENIGN_ENUM_LANGOPT(CompilingModule, CompilingModuleKind, 2, CMK_None,
                    "compiling a module interface")
+BENIGN_LANGOPT(CompilingPCH, 1, 0, "building a pch")
 COMPATIBLE_LANGOPT(ModulesDeclUse    , 1, 0, "require declaration of module uses")
 BENIGN_LANGOPT(ModulesSearchAll  , 1, 1, "searching even non-imported modules to find unresolved references")
 COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "requiring declaration of module uses and all headers to be in modules")
--- a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td
+++ b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td
@ -167,6 +167,9 @@ def disable_llvm_passes : Flag<["-"], "disable-llvm-passes">,
           "frontend by not running any LLVM passes at all">;
 def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">,
  Alias<disable_llvm_passes>;
+def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">,
+  HelpText<"Disable lifetime-markers emission even when optimizations are "
+           "enabled">;
 def disable_red_zone : Flag<["-"], "disable-red-zone">,
  HelpText<"Do not emit code that uses the red zone.">;
 def dwarf_column_info : Flag<["-"], "dwarf-column-info">,
--- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def
@ -52,6 +52,7 @@ CODEGENOPT(DisableGCov       , 1, 0) ///< Don't run the GCov pass, for testing.
 CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
                                     ///< the pristine IR generated by the
                                     ///< frontend.
+CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers
 CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental
                                             ///< pass manager.
 CODEGENOPT(DisableRedZone    , 1, 0) ///< Set when -mno-red-zone is enabled.
--- a/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h
+++ b/contrib/llvm/tools/clang/include/clang/Frontend/FrontendActions.h
@ -88,6 +88,8 @@ class GeneratePCHAction : public ASTFrontendAction {
  static std::unique_ptr<raw_pwrite_stream>
  ComputeASTConsumerArguments(CompilerInstance &CI, StringRef InFile,
                              std::string &Sysroot, std::string &OutputFile);
+
+  bool BeginSourceFileAction(CompilerInstance &CI, StringRef Filename) override;
 };

 class GenerateModuleAction : public ASTFrontendAction {
--- a/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h
+++ b/contrib/llvm/tools/clang/include/clang/Index/IndexSymbol.h
@ -59,6 +59,13 @@ enum class SymbolLanguage {
  CXX,
 };

+/// Language specific sub-kinds.
+enum class SymbolSubKind {
+  None,
+  CXXCopyConstructor,
+  CXXMoveConstructor,
+};
+
 /// Set of properties that provide additional info about a symbol.
 enum class SymbolProperty : uint8_t {
  Generic                       = 1 << 0,
@ -107,6 +114,7 @@ struct SymbolRelation {

 struct SymbolInfo {
  SymbolKind Kind;
+  SymbolSubKind SubKind;
  SymbolPropertySet Properties;
  SymbolLanguage Lang;
 };
@ -121,6 +129,7 @@ void printSymbolRoles(SymbolRoleSet Roles, raw_ostream &OS);
 bool printSymbolName(const Decl *D, const LangOptions &LO, raw_ostream &OS);

 StringRef getSymbolKindString(SymbolKind K);
+StringRef getSymbolSubKindString(SymbolSubKind K);
 StringRef getSymbolLanguageString(SymbolLanguage K);

 void applyForEachSymbolProperty(SymbolPropertySet Props,
--- a/contrib/llvm/tools/clang/include/clang/Sema/Initialization.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Initialization.h
@ -215,14 +215,14 @@ class InitializedEntity {

  /// \brief Create the initialization entity for a parameter.
  static InitializedEntity InitializeParameter(ASTContext &Context,
-                                               ParmVarDecl *Parm) {
+                                               const ParmVarDecl *Parm) {
    return InitializeParameter(Context, Parm, Parm->getType());
  }

  /// \brief Create the initialization entity for a parameter, but use
  /// another type.
  static InitializedEntity InitializeParameter(ASTContext &Context,
-                                               ParmVarDecl *Parm,
+                                               const ParmVarDecl *Parm,
                                               QualType Type) {
    bool Consumed = (Context.getLangOpts().ObjCAutoRefCount &&
                     Parm->hasAttr<NSConsumedAttr>());
--- a/contrib/llvm/tools/clang/include/clang/Sema/Overload.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Overload.h
@ -531,6 +531,13 @@ namespace clang {
      Ambiguous.construct();
    }

+    void setAsIdentityConversion(QualType T) {
+      setStandard();
+      Standard.setAsIdentityConversion();
+      Standard.setFromType(T);
+      Standard.setAllToTypes(T);
+    }
+
    /// \brief Whether the target is really a std::initializer_list, and the
    /// sequence only represents the worst element conversion.
    bool isStdInitializerListElement() const {
@ -601,8 +608,17 @@ namespace clang {

    /// This candidate was not viable because its OpenCL extension is disabled.
    ovl_fail_ext_disabled,
+
+    /// This inherited constructor is not viable because it would slice the
+    /// argument.
+    ovl_fail_inhctor_slice,
  };

+  /// A list of implicit conversion sequences for the arguments of an
+  /// OverloadCandidate.
+  typedef llvm::MutableArrayRef<ImplicitConversionSequence>
+      ConversionSequenceList;
+
  /// OverloadCandidate - A single candidate in an overload set (C++ 13.3).
  struct OverloadCandidate {
    /// Function - The actual function that this candidate
@ -627,18 +643,13 @@ namespace clang {
    /// is a surrogate, but only if IsSurrogate is true.
    CXXConversionDecl *Surrogate;

-    /// Conversions - The conversion sequences used to convert the
-    /// function arguments to the function parameters, the pointer points to a
-    /// fixed size array with NumConversions elements. The memory is owned by
-    /// the OverloadCandidateSet.
-    ImplicitConversionSequence *Conversions;
+    /// The conversion sequences used to convert the function arguments
+    /// to the function parameters.
+    ConversionSequenceList Conversions;

    /// The FixIt hints which can be used to fix the Bad candidate.
    ConversionFixItGenerator Fix;

-    /// NumConversions - The number of elements in the Conversions array.
-    unsigned NumConversions;
-
    /// Viable - True to indicate that this overload candidate is viable.
    bool Viable;

@ -664,6 +675,26 @@ namespace clang {
    /// to be used while performing partial ordering of function templates.
    unsigned ExplicitCallArguments;

+    /// The number of diagnose_if attributes that this overload triggered.
+    /// If any of the triggered attributes are errors, this won't count
+    /// diagnose_if warnings.
+    unsigned NumTriggeredDiagnoseIfs = 0;
+
+    /// Basically a TinyPtrVector<DiagnoseIfAttr *> that doesn't own the vector:
+    /// If NumTriggeredDiagnoseIfs is 0 or 1, this is a DiagnoseIfAttr *,
+    /// otherwise it's a pointer to an array of `NumTriggeredDiagnoseIfs`
+    /// DiagnoseIfAttr *s.
+    llvm::PointerUnion<DiagnoseIfAttr *, DiagnoseIfAttr **> DiagnoseIfInfo;
+
+    /// Gets an ArrayRef for the data at DiagnoseIfInfo. Note that this may give
+    /// you a pointer into DiagnoseIfInfo.
+    ArrayRef<DiagnoseIfAttr *> getDiagnoseIfInfo() const {
+      auto *Ptr = NumTriggeredDiagnoseIfs <= 1
+                      ? DiagnoseIfInfo.getAddrOfPtr1()
+                      : DiagnoseIfInfo.get<DiagnoseIfAttr **>();
+      return {Ptr, NumTriggeredDiagnoseIfs};
+    }
+
    union {
      DeductionFailureInfo DeductionFailure;
      
@ -677,9 +708,9 @@ namespace clang {
    /// hasAmbiguousConversion - Returns whether this overload
    /// candidate requires an ambiguous conversion or not.
    bool hasAmbiguousConversion() const {
-      for (unsigned i = 0, e = NumConversions; i != e; ++i) {
-        if (!Conversions[i].isInitialized()) return false;
-        if (Conversions[i].isAmbiguous()) return true;
+      for (auto &C : Conversions) {
+        if (!C.isInitialized()) return false;
+        if (C.isAmbiguous()) return true;
      }
      return false;
    }
@ -728,17 +759,42 @@ namespace clang {
    SmallVector<OverloadCandidate, 16> Candidates;
    llvm::SmallPtrSet<Decl *, 16> Functions;

-    // Allocator for OverloadCandidate::Conversions. We store the first few
-    // elements inline to avoid allocation for small sets.
-    llvm::BumpPtrAllocator ConversionSequenceAllocator;
+    // Allocator for ConversionSequenceLists and DiagnoseIfAttr* arrays.
+    // We store the first few of each of these inline to avoid allocation for
+    // small sets.
+    llvm::BumpPtrAllocator SlabAllocator;

    SourceLocation Loc;
    CandidateSetKind Kind;

-    unsigned NumInlineSequences;
-    llvm::AlignedCharArray<alignof(ImplicitConversionSequence),
-                           16 * sizeof(ImplicitConversionSequence)>
-        InlineSpace;
+    constexpr static unsigned NumInlineBytes =
+        24 * sizeof(ImplicitConversionSequence);
+    unsigned NumInlineBytesUsed;
+    llvm::AlignedCharArray<alignof(void *), NumInlineBytes> InlineSpace;
+
+    /// If we have space, allocates from inline storage. Otherwise, allocates
+    /// from the slab allocator.
+    /// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
+    /// instead.
+    template <typename T>
+    T *slabAllocate(unsigned N) {
+      // It's simpler if this doesn't need to consider alignment.
+      static_assert(alignof(T) == alignof(void *),
+                    "Only works for pointer-aligned types.");
+      static_assert(std::is_trivial<T>::value ||
+                        std::is_same<ImplicitConversionSequence, T>::value,
+                    "Add destruction logic to OverloadCandidateSet::clear().");
+
+      unsigned NBytes = sizeof(T) * N;
+      if (NBytes > NumInlineBytes - NumInlineBytesUsed)
+        return SlabAllocator.Allocate<T>(N);
+      char *FreeSpaceStart = InlineSpace.buffer + NumInlineBytesUsed;
+      assert(uintptr_t(FreeSpaceStart) % alignof(void *) == 0 &&
+             "Misaligned storage!");
+
+      NumInlineBytesUsed += NBytes;
+      return reinterpret_cast<T *>(FreeSpaceStart);
+    }

    OverloadCandidateSet(const OverloadCandidateSet &) = delete;
    void operator=(const OverloadCandidateSet &) = delete;
@ -747,12 +803,17 @@ namespace clang {

  public:
    OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK)
-        : Loc(Loc), Kind(CSK), NumInlineSequences(0) {}
+        : Loc(Loc), Kind(CSK), NumInlineBytesUsed(0) {}
    ~OverloadCandidateSet() { destroyCandidates(); }

    SourceLocation getLocation() const { return Loc; }
    CandidateSetKind getKind() const { return Kind; }

+    /// Make a DiagnoseIfAttr* array in a block of memory that will live for
+    /// as long as this OverloadCandidateSet. Returns a pointer to the start
+    /// of that array.
+    DiagnoseIfAttr **addDiagnoseIfComplaints(ArrayRef<DiagnoseIfAttr *> CA);
+
    /// \brief Determine when this overload candidate will be new to the
    /// overload set.
    bool isNewCandidate(Decl *F) {
@ -769,30 +830,32 @@ namespace clang {
    size_t size() const { return Candidates.size(); }
    bool empty() const { return Candidates.empty(); }

-    /// \brief Add a new candidate with NumConversions conversion sequence slots
-    /// to the overload set.
-    OverloadCandidate &addCandidate(unsigned NumConversions = 0) {
-      Candidates.push_back(OverloadCandidate());
-      OverloadCandidate &C = Candidates.back();
-
-      // Assign space from the inline array if there are enough free slots
-      // available.
-      if (NumConversions + NumInlineSequences <= 16) {
-        ImplicitConversionSequence *I =
-            (ImplicitConversionSequence *)InlineSpace.buffer;
-        C.Conversions = &I[NumInlineSequences];
-        NumInlineSequences += NumConversions;
-      } else {
-        // Otherwise get memory from the allocator.
-        C.Conversions = ConversionSequenceAllocator
-                          .Allocate<ImplicitConversionSequence>(NumConversions);
-      }
+    /// \brief Allocate storage for conversion sequences for NumConversions
+    /// conversions.
+    ConversionSequenceList
+    allocateConversionSequences(unsigned NumConversions) {
+      ImplicitConversionSequence *Conversions =
+          slabAllocate<ImplicitConversionSequence>(NumConversions);

      // Construct the new objects.
-      for (unsigned i = 0; i != NumConversions; ++i)
-        new (&C.Conversions[i]) ImplicitConversionSequence();
+      for (unsigned I = 0; I != NumConversions; ++I)
+        new (&Conversions[I]) ImplicitConversionSequence();

-      C.NumConversions = NumConversions;
+      return ConversionSequenceList(Conversions, NumConversions);
+    }
+
+    /// \brief Add a new candidate with NumConversions conversion sequence slots
+    /// to the overload set.
+    OverloadCandidate &addCandidate(unsigned NumConversions = 0,
+                                    ConversionSequenceList Conversions = None) {
+      assert((Conversions.empty() || Conversions.size() == NumConversions) &&
+             "preallocated conversion sequence has wrong length");
+
+      Candidates.push_back(OverloadCandidate());
+      OverloadCandidate &C = Candidates.back();
+      C.Conversions = Conversions.empty()
+                          ? allocateConversionSequences(NumConversions)
+                          : Conversions;
      return C;
    }

--- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
+++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h
@ -27,6 +27,7 @@
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/ExpressionTraits.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Module.h"
@ -119,6 +120,7 @@ namespace clang {
  class FunctionProtoType;
  class FunctionTemplateDecl;
  class ImplicitConversionSequence;
+  typedef MutableArrayRef<ImplicitConversionSequence> ConversionSequenceList;
  class InitListExpr;
  class InitializationKind;
  class InitializationSequence;
@ -806,6 +808,12 @@ class Sema {
    /// run time.
    Unevaluated,

+    /// \brief The current expression occurs within a braced-init-list within
+    /// an unevaluated operand. This is mostly like a regular unevaluated
+    /// context, except that we still instantiate constexpr functions that are
+    /// referenced here so that we can perform narrowing checks correctly.
+    UnevaluatedList,
+
    /// \brief The current expression occurs within a discarded statement.
    /// This behaves largely similarly to an unevaluated operand in preventing
    /// definitions from being required, but not in other ways.
@ -898,7 +906,8 @@ class Sema {
    MangleNumberingContext &getMangleNumberingContext(ASTContext &Ctx);

    bool isUnevaluated() const {
-      return Context == Unevaluated || Context == UnevaluatedAbstract;
+      return Context == Unevaluated || Context == UnevaluatedAbstract ||
+             Context == UnevaluatedList;
    }
  };

@ -2510,10 +2519,11 @@ class Sema {
  void AddOverloadCandidate(FunctionDecl *Function,
                            DeclAccessPair FoundDecl,
                            ArrayRef<Expr *> Args,
-                            OverloadCandidateSet& CandidateSet,
+                            OverloadCandidateSet &CandidateSet,
                            bool SuppressUserConversions = false,
                            bool PartialOverloading = false,
-                            bool AllowExplicit = false);
+                            bool AllowExplicit = false,
+                            ConversionSequenceList EarlyConversions = None);
  void AddFunctionCandidates(const UnresolvedSetImpl &Functions,
                      ArrayRef<Expr *> Args,
                      OverloadCandidateSet &CandidateSet,
@ -2523,23 +2533,25 @@ class Sema {
  void AddMethodCandidate(DeclAccessPair FoundDecl,
                          QualType ObjectType,
                          Expr::Classification ObjectClassification,
-                          ArrayRef<Expr *> Args,
+                          Expr *ThisArg, ArrayRef<Expr *> Args,
                          OverloadCandidateSet& CandidateSet,
                          bool SuppressUserConversion = false);
  void AddMethodCandidate(CXXMethodDecl *Method,
                          DeclAccessPair FoundDecl,
                          CXXRecordDecl *ActingContext, QualType ObjectType,
                          Expr::Classification ObjectClassification,
-                          ArrayRef<Expr *> Args,
+                          Expr *ThisArg, ArrayRef<Expr *> Args,
                          OverloadCandidateSet& CandidateSet,
                          bool SuppressUserConversions = false,
-                          bool PartialOverloading = false);
+                          bool PartialOverloading = false,
+                          ConversionSequenceList EarlyConversions = None);
  void AddMethodTemplateCandidate(FunctionTemplateDecl *MethodTmpl,
                                  DeclAccessPair FoundDecl,
                                  CXXRecordDecl *ActingContext,
                                 TemplateArgumentListInfo *ExplicitTemplateArgs,
                                  QualType ObjectType,
                                  Expr::Classification ObjectClassification,
+                                  Expr *ThisArg,
                                  ArrayRef<Expr *> Args,
                                  OverloadCandidateSet& CandidateSet,
                                  bool SuppressUserConversions = false,
@ -2551,6 +2563,16 @@ class Sema {
                                    OverloadCandidateSet& CandidateSet,
                                    bool SuppressUserConversions = false,
                                    bool PartialOverloading = false);
+  bool CheckNonDependentConversions(FunctionTemplateDecl *FunctionTemplate,
+                                    ArrayRef<QualType> ParamTypes,
+                                    ArrayRef<Expr *> Args,
+                                    OverloadCandidateSet &CandidateSet,
+                                    ConversionSequenceList &Conversions,
+                                    bool SuppressUserConversions,
+                                    CXXRecordDecl *ActingContext = nullptr,
+                                    QualType ObjectType = QualType(),
+                                    Expr::Classification
+                                        ObjectClassification = {});
  void AddConversionCandidate(CXXConversionDecl *Conversion,
                              DeclAccessPair FoundDecl,
                              CXXRecordDecl *ActingContext,
@ -2603,6 +2625,38 @@ class Sema {
  EnableIfAttr *CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
                              bool MissingImplicitThis = false);

+  /// Check the diagnose_if attributes on the given function. Returns the
+  /// first succesful fatal attribute, or null if calling Function(Args) isn't
+  /// an error.
+  ///
+  /// This only considers ArgDependent DiagnoseIfAttrs.
+  ///
+  /// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
+  /// succeed. If this function returns non-null, the contents of Nonfatal are
+  /// unspecified.
+  DiagnoseIfAttr *
+  checkArgDependentDiagnoseIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
+                              SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal,
+                              bool MissingImplicitThis = false,
+                              Expr *ThisArg = nullptr);
+
+  /// Check the diagnose_if expressions on the given function. Returns the
+  /// first succesful fatal attribute, or null if using Function isn't
+  /// an error.
+  ///
+  /// This ignores all ArgDependent DiagnoseIfAttrs.
+  ///
+  /// This will populate Nonfatal with all non-error DiagnoseIfAttrs that
+  /// succeed. If this function returns non-null, the contents of Nonfatal are
+  /// unspecified.
+  DiagnoseIfAttr *
+  checkArgIndependentDiagnoseIf(FunctionDecl *Function,
+                                SmallVectorImpl<DiagnoseIfAttr *> &Nonfatal);
+
+  /// Emits the diagnostic contained in the given DiagnoseIfAttr at Loc. Also
+  /// emits a note about the location of said attribute.
+  void emitDiagnoseIfDiagnostic(SourceLocation Loc, const DiagnoseIfAttr *DIA);
+
  /// Returns whether the given function's address can be taken or not,
  /// optionally emitting a diagnostic if the address can't be taken.
  ///
@ -3801,6 +3855,9 @@ class Sema {
  /// variable will have in the given scope.
  QualType getCapturedDeclRefType(VarDecl *Var, SourceLocation Loc);

+  /// Mark all of the declarations referenced within a particular AST node as
+  /// referenced. Used when template instantiation instantiates a non-dependent
+  /// type -- entities referenced by the type are now referenced.
  void MarkDeclarationsReferencedInType(SourceLocation Loc, QualType T);
  void MarkDeclarationsReferencedInExpr(Expr *E,
                                        bool SkipLocalVariables = false);
@ -6580,6 +6637,8 @@ class Sema {
    /// \brief The explicitly-specified template arguments were not valid
    /// template arguments for the given template.
    TDK_InvalidExplicitArguments,
+    /// \brief Checking non-dependent argument conversions failed.
+    TDK_NonDependentConversionFailure,
    /// \brief Deduction failed; that's all we know.
    TDK_MiscellaneousDeductionFailure,
    /// \brief CUDA Target attributes do not match.
@ -6618,22 +6677,21 @@ class Sema {
    QualType OriginalArgType;
  };

-  TemplateDeductionResult
-  FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
-                      SmallVectorImpl<DeducedTemplateArgument> &Deduced,
-                                  unsigned NumExplicitlySpecified,
-                                  FunctionDecl *&Specialization,
-                                  sema::TemplateDeductionInfo &Info,
-           SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
-                                  bool PartialOverloading = false);
+  TemplateDeductionResult FinishTemplateArgumentDeduction(
+      FunctionTemplateDecl *FunctionTemplate,
+      SmallVectorImpl<DeducedTemplateArgument> &Deduced,
+      unsigned NumExplicitlySpecified, FunctionDecl *&Specialization,
+      sema::TemplateDeductionInfo &Info,
+      SmallVectorImpl<OriginalCallArg> const *OriginalCallArgs = nullptr,
+      bool PartialOverloading = false,
+      llvm::function_ref<bool()> CheckNonDependent = []{ return false; });

-  TemplateDeductionResult
-  DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
-                          TemplateArgumentListInfo *ExplicitTemplateArgs,
-                          ArrayRef<Expr *> Args,
-                          FunctionDecl *&Specialization,
-                          sema::TemplateDeductionInfo &Info,
-                          bool PartialOverloading = false);
+  TemplateDeductionResult DeduceTemplateArguments(
+      FunctionTemplateDecl *FunctionTemplate,
+      TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef<Expr *> Args,
+      FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info,
+      bool PartialOverloading,
+      llvm::function_ref<bool(ArrayRef<QualType>)> CheckNonDependent);

  TemplateDeductionResult
  DeduceTemplateArguments(FunctionTemplateDecl *FunctionTemplate,
@ -6877,6 +6935,10 @@ class Sema {
  /// Specializations whose definitions are currently being instantiated.
  llvm::DenseSet<std::pair<Decl *, unsigned>> InstantiatingSpecializations;

+  /// Non-dependent types used in templates that have already been instantiated
+  /// by some template instantiation.
+  llvm::DenseSet<QualType> InstantiatedNonDependentTypes;
+
  /// \brief Extra modules inspected when performing a lookup during a template
  /// instantiation. Computed lazily.
  SmallVector<Module*, 16> ActiveTemplateInstantiationLookupModules;
@ -10186,6 +10248,22 @@ class EnterExpressionEvaluationContext {
                                            IsDecltype);
  }

+  enum InitListTag { InitList };
+  EnterExpressionEvaluationContext(Sema &Actions, InitListTag,
+                                   bool ShouldEnter = true)
+      : Actions(Actions), Entered(false) {
+    // In C++11 onwards, narrowing checks are performed on the contents of
+    // braced-init-lists, even when they occur within unevaluated operands.
+    // Therefore we still need to instantiate constexpr functions used in such
+    // a context.
+    if (ShouldEnter && Actions.isUnevaluatedContext() &&
+        Actions.getLangOpts().CPlusPlus11) {
+      Actions.PushExpressionEvaluationContext(Sema::UnevaluatedList, nullptr,
+                                              false);
+      Entered = true;
+    }
+  }
+
  ~EnterExpressionEvaluationContext() {
    if (Entered)
      Actions.PopExpressionEvaluationContext();
--- a/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/contrib/llvm/tools/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@ -278,6 +278,14 @@ def VirtualCallChecker : Checker<"VirtualCall">,

 } // end: "optin.cplusplus"

+let ParentPackage = CplusplusAlpha in {
+
+def IteratorPastEndChecker : Checker<"IteratorPastEnd">,
+  HelpText<"Check iterators used past end">,
+  DescFile<"IteratorPastEndChecker.cpp">;
+
+} // end: "alpha.cplusplus"
+

 //===----------------------------------------------------------------------===//
 // Valist checkers.
--- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp
@ -4543,6 +4543,12 @@ class ExprEvaluatorBase
                             Call.getLValueBase().dyn_cast<const ValueDecl*>());
      if (!FD)
        return Error(Callee);
+      // Don't call function pointers which have been cast to some other type.
+      // Per DR (no number yet), the caller and callee can differ in noexcept.
+      if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
+        CalleeType->getPointeeType(), FD->getType())) {
+        return Error(E);
+      }

      // Overloaded operator calls to member functions are represented as normal
      // calls with '*this' as the first argument.
@ -4558,14 +4564,42 @@ class ExprEvaluatorBase
          return false;
        This = &ThisVal;
        Args = Args.slice(1);
+      } else if (MD && MD->isLambdaStaticInvoker()) {   
+        // Map the static invoker for the lambda back to the call operator.
+        // Conveniently, we don't have to slice out the 'this' argument (as is
+        // being done for the non-static case), since a static member function
+        // doesn't have an implicit argument passed in.
+        const CXXRecordDecl *ClosureClass = MD->getParent();
+        assert(
+            ClosureClass->captures_begin() == ClosureClass->captures_end() &&
+            "Number of captures must be zero for conversion to function-ptr");
+
+        const CXXMethodDecl *LambdaCallOp =
+            ClosureClass->getLambdaCallOperator();
+
+        // Set 'FD', the function that will be called below, to the call
+        // operator.  If the closure object represents a generic lambda, find
+        // the corresponding specialization of the call operator.
+
+        if (ClosureClass->isGenericLambda()) {
+          assert(MD->isFunctionTemplateSpecialization() &&
+                 "A generic lambda's static-invoker function must be a "
+                 "template specialization");
+          const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
+          FunctionTemplateDecl *CallOpTemplate =
+              LambdaCallOp->getDescribedFunctionTemplate();
+          void *InsertPos = nullptr;
+          FunctionDecl *CorrespondingCallOpSpecialization =
+              CallOpTemplate->findSpecialization(TAL->asArray(), InsertPos);
+          assert(CorrespondingCallOpSpecialization &&
+                 "We must always have a function call operator specialization "
+                 "that corresponds to our static invoker specialization");
+          FD = cast<CXXMethodDecl>(CorrespondingCallOpSpecialization);
+        } else
+          FD = LambdaCallOp;
      }

-      // Don't call function pointers which have been cast to some other type.
-      // Per DR (no number yet), the caller and callee can differ in noexcept.
-      if (!Info.Ctx.hasSameFunctionTypeIgnoringExceptionSpec(
-              CalleeType->getPointeeType(), FD->getType())) {
-        return Error(E);
-      }
+      
    } else
      return Error(E);

@ -5834,6 +5868,7 @@ namespace {
    bool VisitCXXConstructExpr(const CXXConstructExpr *E) {
      return VisitCXXConstructExpr(E, E->getType());
    }
+    bool VisitLambdaExpr(const LambdaExpr *E);
    bool VisitCXXInheritedCtorInitExpr(const CXXInheritedCtorInitExpr *E);
    bool VisitCXXConstructExpr(const CXXConstructExpr *E, QualType T);
    bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E);
@ -6168,6 +6203,21 @@ bool RecordExprEvaluator::VisitCXXStdInitializerListExpr(
  return true;
 }

+bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
+  const CXXRecordDecl *ClosureClass = E->getLambdaClass();
+  if (ClosureClass->isInvalidDecl()) return false;
+
+  if (Info.checkingPotentialConstantExpression()) return true;
+  if (E->capture_size()) {
+    Info.FFDiag(E, diag::note_unimplemented_constexpr_lambda_feature_ast)
+        << "can not evaluate lambda expressions with captures";
+    return false;
+  }
+  // FIXME: Implement captures.
+  Result = APValue(APValue::UninitStruct(), /*NumBases*/0, /*NumFields*/0);
+  return true;
+}
+
 static bool EvaluateRecord(const Expr *E, const LValue &This,
                           APValue &Result, EvalInfo &Info) {
  assert(E->isRValue() && E->getType()->isRecordType() &&
@ -6217,6 +6267,9 @@ class TemporaryExprEvaluator
  bool VisitCXXStdInitializerListExpr(const CXXStdInitializerListExpr *E) {
    return VisitConstructExpr(E);
  }
+  bool VisitLambdaExpr(const LambdaExpr *E) {
+    return VisitConstructExpr(E);
+  }
 };
 } // end anonymous namespace

@ -10357,10 +10410,25 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result,

 bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
                                    const FunctionDecl *Callee,
-                                    ArrayRef<const Expr*> Args) const {
+                                    ArrayRef<const Expr*> Args,
+                                    const Expr *This) const {
  Expr::EvalStatus Status;
  EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpressionUnevaluated);

+  LValue ThisVal;
+  const LValue *ThisPtr = nullptr;
+  if (This) {
+#ifndef NDEBUG
+    auto *MD = dyn_cast<CXXMethodDecl>(Callee);
+    assert(MD && "Don't provide `this` for non-methods.");
+    assert(!MD->isStatic() && "Don't provide `this` for static methods.");
+#endif
+    if (EvaluateObjectArgument(Info, This, ThisVal))
+      ThisPtr = &ThisVal;
+    if (Info.EvalStatus.HasSideEffects)
+      return false;
+  }
+
  ArgVector ArgValues(Args.size());
  for (ArrayRef<const Expr*>::iterator I = Args.begin(), E = Args.end();
       I != E; ++I) {
@ -10373,7 +10441,7 @@ bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx,
  }

  // Build fake call to Callee.
-  CallStackFrame Frame(Info, Callee->getLocation(), Callee, /*This*/nullptr,
+  CallStackFrame Frame(Info, Callee->getLocation(), Callee, ThisPtr,
                       ArgValues.data());
  return Evaluate(Value, Info, this) && !Info.EvalStatus.HasSideEffects;
 }
--- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
+++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp
@ -109,13 +109,13 @@ static const DeclContext *getEffectiveParentContext(const DeclContext *DC) {

 static const FunctionDecl *getStructor(const NamedDecl *ND) {
  if (const auto *FTD = dyn_cast<FunctionTemplateDecl>(ND))
-    return FTD->getTemplatedDecl();
+    return FTD->getTemplatedDecl()->getCanonicalDecl();

  const auto *FD = cast<FunctionDecl>(ND);
  if (const auto *FTD = FD->getPrimaryTemplate())
-    return FTD->getTemplatedDecl();
+    return FTD->getTemplatedDecl()->getCanonicalDecl();

-  return FD;
+  return FD->getCanonicalDecl();
 }

 /// MicrosoftMangleContextImpl - Overrides the default MangleContext for the
@ -312,6 +312,10 @@ class MicrosoftCXXNameMangler {
  void mangleNestedName(const NamedDecl *ND);

 private:
+  bool isStructorDecl(const NamedDecl *ND) const {
+    return ND == Structor || getStructor(ND) == Structor;
+  }
+
  void mangleUnqualifiedName(const NamedDecl *ND) {
    mangleUnqualifiedName(ND, ND->getDeclName());
  }
@ -898,7 +902,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
      llvm_unreachable("Can't mangle Objective-C selector names here!");

    case DeclarationName::CXXConstructorName:
-      if (Structor == getStructor(ND)) {
+      if (isStructorDecl(ND)) {
        if (StructorType == Ctor_CopyingClosure) {
          Out << "?_O";
          return;
@ -912,7 +916,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
      return;

    case DeclarationName::CXXDestructorName:
-      if (ND == Structor)
+      if (isStructorDecl(ND))
        // If the named decl is the C++ destructor we're mangling,
        // use the type we were given.
        mangleCXXDtorType(static_cast<CXXDtorType>(StructorType));
@ -1862,7 +1866,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
      IsStructor = true;
      IsCtorClosure = (StructorType == Ctor_CopyingClosure ||
                       StructorType == Ctor_DefaultClosure) &&
-                      getStructor(MD) == Structor;
+                      isStructorDecl(MD);
      if (IsCtorClosure)
        CC = getASTContext().getDefaultCallingConvention(
            /*IsVariadic=*/false, /*IsCXXMethod=*/true);
@ -1883,7 +1887,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
  // <return-type> ::= <type>
  //               ::= @ # structors (they have no declared return type)
  if (IsStructor) {
-    if (isa<CXXDestructorDecl>(D) && D == Structor &&
+    if (isa<CXXDestructorDecl>(D) && isStructorDecl(D) &&
        StructorType == Dtor_Deleting) {
      // The scalar deleting destructor takes an extra int argument.
      // However, the FunctionType generated has 0 arguments.
--- a/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/BackendUtil.cpp
@ -312,7 +312,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
  // At O0 and O1 we only run the always inliner which is more efficient. At
  // higher optimization levels we run the normal inliner.
  if (CodeGenOpts.OptimizationLevel <= 1) {
-    bool InsertLifetimeIntrinsics = CodeGenOpts.OptimizationLevel != 0;
+    bool InsertLifetimeIntrinsics = (CodeGenOpts.OptimizationLevel != 0 &&
+                                     !CodeGenOpts.DisableLifetimeMarkers);
    PMBuilder.Inliner = createAlwaysInlinerLegacyPass(InsertLifetimeIntrinsics);
  } else {
    PMBuilder.Inliner = createFunctionInliningPass(
@ -519,11 +520,22 @@ void EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
           .Case("dynamic-no-pic", llvm::Reloc::DynamicNoPIC);
  assert(RM.hasValue() && "invalid PIC model!");

-  CodeGenOpt::Level OptLevel = CodeGenOpt::Default;
+  CodeGenOpt::Level OptLevel;
  switch (CodeGenOpts.OptimizationLevel) {
-  default: break;
-  case 0: OptLevel = CodeGenOpt::None; break;
-  case 3: OptLevel = CodeGenOpt::Aggressive; break;
+  default:
+    llvm_unreachable("Invalid optimization level!");
+  case 0:
+    OptLevel = CodeGenOpt::None;
+    break;
+  case 1:
+    OptLevel = CodeGenOpt::Less;
+    break;
+  case 2:
+    OptLevel = CodeGenOpt::Default;
+    break; // O2/Os/Oz
+  case 3:
+    OptLevel = CodeGenOpt::Aggressive;
+    break;
  }

  llvm::TargetOptions Options;
@ -849,21 +861,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
  }
 }

-static void runThinLTOBackend(const CodeGenOptions &CGOpts, Module *M,
+static void runThinLTOBackend(ModuleSummaryIndex *CombinedIndex, Module *M,
                              std::unique_ptr<raw_pwrite_stream> OS) {
-  // If we are performing a ThinLTO importing compile, load the function index
-  // into memory and pass it into thinBackend, which will run the function
-  // importer and invoke LTO passes.
-  Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-      llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
-  if (!IndexOrErr) {
-    logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
-                          "Error loading index file '" +
-                              CGOpts.ThinLTOIndexFile + "': ");
-    return;
-  }
-  std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
-
  StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
      ModuleToDefinedGVSummaries;
  CombinedIndex->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@ -949,8 +948,26 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                              BackendAction Action,
                              std::unique_ptr<raw_pwrite_stream> OS) {
  if (!CGOpts.ThinLTOIndexFile.empty()) {
-    runThinLTOBackend(CGOpts, M, std::move(OS));
-    return;
+    // If we are performing a ThinLTO importing compile, load the function index
+    // into memory and pass it into runThinLTOBackend, which will run the
+    // function importer and invoke LTO passes.
+    Expected<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
+        llvm::getModuleSummaryIndexForFile(CGOpts.ThinLTOIndexFile);
+    if (!IndexOrErr) {
+      logAllUnhandledErrors(IndexOrErr.takeError(), errs(),
+                            "Error loading index file '" +
+                            CGOpts.ThinLTOIndexFile + "': ");
+      return;
+    }
+    std::unique_ptr<ModuleSummaryIndex> CombinedIndex = std::move(*IndexOrErr);
+    // A null CombinedIndex means we should skip ThinLTO compilation
+    // (LLVM will optionally ignore empty index files, returning null instead
+    // of an error).
+    bool DoThinLTOBackend = CombinedIndex != nullptr;
+    if (DoThinLTOBackend) {
+      runThinLTOBackend(CombinedIndex.get(), M, std::move(OS));
+      return;
+    }
  }

  EmitAssemblyHelper AsmHelper(Diags, HeaderOpts, CGOpts, TOpts, LOpts, M);
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCleanup.h
@ -616,6 +616,8 @@ struct EHPersonality {
  static const EHPersonality GNU_C_SJLJ;
  static const EHPersonality GNU_C_SEH;
  static const EHPersonality GNU_ObjC;
+  static const EHPersonality GNU_ObjC_SJLJ;
+  static const EHPersonality GNU_ObjC_SEH;
  static const EHPersonality GNUstep_ObjC;
  static const EHPersonality GNU_ObjCXX;
  static const EHPersonality NeXT_ObjC;
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGException.cpp
@ -97,6 +97,10 @@ EHPersonality::GNU_CPlusPlus_SEH = { "__gxx_personality_seh0", nullptr };
 const EHPersonality
 EHPersonality::GNU_ObjC = {"__gnu_objc_personality_v0", "objc_exception_throw"};
 const EHPersonality
+EHPersonality::GNU_ObjC_SJLJ = {"__gnu_objc_personality_sj0", "objc_exception_throw"};
+const EHPersonality
+EHPersonality::GNU_ObjC_SEH = {"__gnu_objc_personality_seh0", "objc_exception_throw"};
+const EHPersonality
 EHPersonality::GNU_ObjCXX = { "__gnustep_objcxx_personality_v0", nullptr };
 const EHPersonality
 EHPersonality::GNUstep_ObjC = { "__gnustep_objc_personality_v0", nullptr };
@ -137,6 +141,10 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
    // fallthrough
  case ObjCRuntime::GCC:
  case ObjCRuntime::ObjFW:
+    if (L.SjLjExceptions)
+      return EHPersonality::GNU_ObjC_SJLJ;
+    else if (useLibGCCSEHPersonality(T))
+      return EHPersonality::GNU_ObjC_SEH;
    return EHPersonality::GNU_ObjC;
  }
  llvm_unreachable("bad runtime kind");
--- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.cpp
@ -42,6 +42,9 @@ using namespace CodeGen;
 /// markers.
 static bool shouldEmitLifetimeMarkers(const CodeGenOptions &CGOpts,
                                      const LangOptions &LangOpts) {
+  if (CGOpts.DisableLifetimeMarkers)
+    return false;
+
  // Asan uses markers for use-after-scope checks.
  if (CGOpts.SanitizeAddressUseAfterScope)
    return true;
--- a/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains.cpp
@ -3812,6 +3812,7 @@ ToolChain::CXXStdlibType NetBSD::GetDefaultCXXStdlibType() const {
  if (Major >= 7 || Major == 0) {
    switch (getArch()) {
    case llvm::Triple::aarch64:
+    case llvm::Triple::aarch64_be:
    case llvm::Triple::arm:
    case llvm::Triple::armeb:
    case llvm::Triple::thumb:
--- a/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
+++ b/contrib/llvm/tools/clang/lib/Driver/Tools.cpp
@ -9644,6 +9644,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  if (Major >= 7 || Major == 0) {
    switch (getToolChain().getArch()) {
    case llvm::Triple::aarch64:
+    case llvm::Triple::aarch64_be:
    case llvm::Triple::arm:
    case llvm::Triple::armeb:
    case llvm::Triple::thumb:
--- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@ -1282,9 +1282,7 @@ class AnnotatingParser {
      return TT_UnaryOperator;

    const FormatToken *NextToken = Tok.getNextNonComment();
-    if (!NextToken ||
-        NextToken->isOneOf(tok::arrow, Keywords.kw_final, tok::equal,
-                           Keywords.kw_override) ||
+    if (!NextToken || NextToken->isOneOf(tok::arrow, tok::equal) ||
        (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
      return TT_PointerOrReference;

@ -2088,9 +2086,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
        !Line.IsMultiVariableDeclStmt)))
    return true;
  if (Left.is(TT_PointerOrReference))
-    return Right.Tok.isLiteral() ||
-           Right.isOneOf(TT_BlockComment, Keywords.kw_final,
-                         Keywords.kw_override) ||
+    return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
+           (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) &&
+            !Right.is(TT_StartOfName)) ||
           (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
           (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
                           tok::l_paren) &&
--- a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@ -737,7 +737,7 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
      return;
  }
  if (Next->is(tok::exclaim) && PreviousMustBeValue)
-    addUnwrappedLine();
+    return addUnwrappedLine();
  bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
  bool NextEndsTemplateExpr =
      Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
@ -745,9 +745,10 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
      (PreviousMustBeValue ||
       Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
                         tok::minusminus)))
-    addUnwrappedLine();
-  if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
-    addUnwrappedLine();
+    return addUnwrappedLine();
+  if ((PreviousMustBeValue || Previous->is(tok::r_brace)) &&
+      isJSDeclOrStmt(Keywords, Next))
+    return addUnwrappedLine();
 }

 void UnwrappedLineParser::parseStructuralElement() {
@ -1974,7 +1975,14 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
      !FormatTok->isStringLiteral())
    return;

-  while (!eof() && FormatTok->isNot(tok::semi)) {
+  while (!eof()) {
+    if (FormatTok->is(tok::semi))
+      return;
+    if (Line->Tokens.size() == 0) {
+      // Common issue: Automatic Semicolon Insertion wrapped the line, so the
+      // import statement should terminate.
+      return;
+    }
    if (FormatTok->is(tok::l_brace)) {
      FormatTok->BlockKind = BK_Block;
      parseBracedList();
--- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp
@ -520,6 +520,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
    Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists;

  Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes);
+  Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers);
  Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
  Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables);
  Opts.UseRegisterSizedBitfieldAccess = Args.hasArg(
--- a/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
+++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendActions.cpp
@ -127,6 +127,12 @@ GeneratePCHAction::ComputeASTConsumerArguments(CompilerInstance &CI,
  return OS;
 }

+bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI,
+                                              StringRef Filename) {
+  CI.getLangOpts().CompilingPCH = true;
+  return true;
+}
+
 std::unique_ptr<ASTConsumer>
 GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
                                        StringRef InFile) {
--- a/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp
+++ b/contrib/llvm/tools/clang/lib/Index/IndexSymbol.cpp
@ -53,6 +53,7 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
  assert(D);
  SymbolInfo Info;
  Info.Kind = SymbolKind::Unknown;
+  Info.SubKind = SymbolSubKind::None;
  Info.Properties = SymbolPropertySet();
  Info.Lang = SymbolLanguage::C;

@ -183,10 +184,16 @@ SymbolInfo index::getSymbolInfo(const Decl *D) {
      Info.Kind = SymbolKind::NamespaceAlias;
      Info.Lang = SymbolLanguage::CXX;
      break;
-    case Decl::CXXConstructor:
+    case Decl::CXXConstructor: {
      Info.Kind = SymbolKind::Constructor;
      Info.Lang = SymbolLanguage::CXX;
+      auto *CD = cast<CXXConstructorDecl>(D);
+      if (CD->isCopyConstructor())
+        Info.SubKind = SymbolSubKind::CXXCopyConstructor;
+      else if (CD->isMoveConstructor())
+        Info.SubKind = SymbolSubKind::CXXMoveConstructor;
      break;
+    }
    case Decl::CXXDestructor:
      Info.Kind = SymbolKind::Destructor;
      Info.Lang = SymbolLanguage::CXX;
@ -363,6 +370,15 @@ StringRef index::getSymbolKindString(SymbolKind K) {
  llvm_unreachable("invalid symbol kind");
 }

+StringRef index::getSymbolSubKindString(SymbolSubKind K) {
+  switch (K) {
+  case SymbolSubKind::None: return "<none>";
+  case SymbolSubKind::CXXCopyConstructor: return "cxx-copy-ctor";
+  case SymbolSubKind::CXXMoveConstructor: return "cxx-move-ctor";
+  }
+  llvm_unreachable("invalid symbol subkind");
+}
+
 StringRef index::getSymbolLanguageString(SymbolLanguage K) {
  switch (K) {
  case SymbolLanguage::C: return "C";
--- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
+++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp
@ -1996,10 +1996,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,

  // Ask HeaderInfo if we should enter this #include file.  If not, #including
  // this file will have no effect.
+  bool SkipHeader = false;
  if (ShouldEnter &&
      !HeaderInfo.ShouldEnterIncludeFile(*this, File, isImport,
                                         SuggestedModule.getModule())) {
    ShouldEnter = false;
+    SkipHeader = true;
    if (Callbacks)
      Callbacks->FileSkipped(*File, FilenameTok, FileCharacter);
  }
@ -2008,6 +2010,14 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
  if (!ShouldEnter) {
    // If this is a module import, make it visible if needed.
    if (auto *M = SuggestedModule.getModule()) {
+      // When building a pch, -fmodule-name tells the compiler to textually
+      // include headers in the specified module. But it is possible that
+      // ShouldEnter is false because we are skipping the header. In that
+      // case, We are not importing the specified module.
+      if (SkipHeader && getLangOpts().CompilingPCH &&
+          M->getTopLevelModuleName() == getLangOpts().CurrentModule)
+        return;
+
      makeModuleVisible(M, HashLoc);

      if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
@ -2032,6 +2042,13 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,

  // Determine if we're switching to building a new submodule, and which one.
  if (auto *M = SuggestedModule.getModule()) {
+    // When building a pch, -fmodule-name tells the compiler to textually
+    // include headers in the specified module. We are not building the
+    // specified module.
+    if (getLangOpts().CompilingPCH &&
+        M->getTopLevelModuleName() == getLangOpts().CurrentModule)
+      return;
+
    assert(!CurSubmodule && "should not have marked this as a module yet");
    CurSubmodule = M;

--- a/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseDecl.cpp
@ -306,10 +306,11 @@ unsigned Parser::ParseAttributeArgsCommon(

    // Parse the non-empty comma-separated list of expressions.
    do {
-      bool ShouldEnter = attributeParsedArgsUnevaluated(*AttrName);
+      bool Uneval = attributeParsedArgsUnevaluated(*AttrName);
      EnterExpressionEvaluationContext Unevaluated(
-          Actions, Sema::Unevaluated, /*LambdaContextDecl=*/nullptr,
-          /*IsDecltype=*/false, ShouldEnter);
+          Actions, Uneval ? Sema::Unevaluated : Sema::ConstantEvaluated,
+          /*LambdaContextDecl=*/nullptr,
+          /*IsDecltype=*/false);

      ExprResult ArgExpr(
          Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression()));
--- a/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp
+++ b/contrib/llvm/tools/clang/lib/Parse/ParseInit.cpp
@ -404,6 +404,10 @@ ExprResult Parser::ParseBraceInitializer() {
    return Actions.ActOnInitList(LBraceLoc, None, ConsumeBrace());
  }

+  // Enter an appropriate expression evaluation context for an initializer list.
+  EnterExpressionEvaluationContext EnterContext(
+      Actions, EnterExpressionEvaluationContext::InitList);
+
  bool InitExprsOk = true;

  while (1) {
--- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp
@ -1242,7 +1242,8 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
    QualType RHSTy = RHS.get()->getType();

    llvm::Triple::ArchType Arch = Context.getTargetInfo().getTriple().getArch();
-    bool IsPolyUnsigned = Arch == llvm::Triple::aarch64;
+    bool IsPolyUnsigned = Arch == llvm::Triple::aarch64 ||
+                          Arch == llvm::Triple::aarch64_be;
    bool IsInt64Long =
        Context.getTargetInfo().getInt64Type() == TargetInfo::SignedLong;
    QualType EltTy =
--- a/Show More
+++ b/Show More