Merge llvm, clang, lld, lldb, compiler-rt and libc++ release_60 r321788,

update build glue and version numbers.
svn path=/projects/clang600-import/; revision=327657
2018-01-06 23:44:14 +00:00 · 2018-01-06 23:44:14 +00:00 · 30785c0e2b · 2020-12-20 02:59:44 +00:00
commit 30785c0e2b
parent 4b49587c3d ead8c8e4f1 0f584385a7 bbb901fa67 97dd191f56 4298ea0c0f d215fd3b74
198 changed files with 4530 additions and 2566 deletions
--- a/contrib/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/contrib/compiler-rt/lib/profile/GCDAProfiling.c
@ -228,6 +228,7 @@ static void unmap_file() {
 * profiling enabled will emit to a different file. Only one file may be
 * started at a time.
 */
+COMPILER_RT_VISIBILITY
 void llvm_gcda_start_file(const char *orig_filename, const char version[4],
                          uint32_t checksum) {
  const char *mode = "r+b";
@ -295,6 +296,7 @@ void llvm_gcda_start_file(const char *orig_filename, const char version[4],
 /* Given an array of pointers to counters (counters), increment the n-th one,
 * where we're also given a pointer to n (predecessor).
 */
+COMPILER_RT_VISIBILITY
 void llvm_gcda_increment_indirect_counter(uint32_t *predecessor,
                                          uint64_t **counters) {
  uint64_t *counter;
@ -317,6 +319,7 @@ void llvm_gcda_increment_indirect_counter(uint32_t *predecessor,
 #endif
 }

+COMPILER_RT_VISIBILITY
 void llvm_gcda_emit_function(uint32_t ident, const char *function_name,
                             uint32_t func_checksum, uint8_t use_extra_checksum,
                             uint32_t cfg_checksum) {
@ -343,6 +346,7 @@ void llvm_gcda_emit_function(uint32_t ident, const char *function_name,
    write_string(function_name);
 }

+COMPILER_RT_VISIBILITY
 void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
  uint32_t i;
  uint64_t *old_ctrs = NULL;
@ -394,6 +398,7 @@ void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) {
 #endif
 }

+COMPILER_RT_VISIBILITY
 void llvm_gcda_summary_info() {
  const uint32_t obj_summary_len = 9; /* Length for gcov compatibility. */
  uint32_t i;
@ -447,6 +452,7 @@ void llvm_gcda_summary_info() {
 #endif
 }

+COMPILER_RT_VISIBILITY
 void llvm_gcda_end_file() {
  /* Write out EOF record. */
  if (output_file) {
@ -459,6 +465,7 @@ void llvm_gcda_end_file() {
      unmap_file();
    }

+    fflush(output_file);
    lprofUnlockFd(fd);
    fclose(output_file);
    output_file = NULL;
@ -471,6 +478,7 @@ void llvm_gcda_end_file() {
 #endif
 }

+COMPILER_RT_VISIBILITY
 void llvm_register_writeout_function(writeout_fn fn) {
  struct writeout_fn_node *new_node = malloc(sizeof(struct writeout_fn_node));
  new_node->fn = fn;
@ -484,6 +492,7 @@ void llvm_register_writeout_function(writeout_fn fn) {
  }
 }

+COMPILER_RT_VISIBILITY
 void llvm_writeout_files(void) {
  struct writeout_fn_node *curr = writeout_fn_head;

@ -493,6 +502,7 @@ void llvm_writeout_files(void) {
  }
 }

+COMPILER_RT_VISIBILITY
 void llvm_delete_writeout_function_list(void) {
  while (writeout_fn_head) {
    struct writeout_fn_node *node = writeout_fn_head;
@ -503,6 +513,7 @@ void llvm_delete_writeout_function_list(void) {
  writeout_fn_head = writeout_fn_tail = NULL;
 }

+COMPILER_RT_VISIBILITY
 void llvm_register_flush_function(flush_fn fn) {
  struct flush_fn_node *new_node = malloc(sizeof(struct flush_fn_node));
  new_node->fn = fn;
@ -516,6 +527,7 @@ void llvm_register_flush_function(flush_fn fn) {
  }
 }

+COMPILER_RT_VISIBILITY
 void __gcov_flush() {
  struct flush_fn_node *curr = flush_fn_head;

@ -525,6 +537,7 @@ void __gcov_flush() {
  }
 }

+COMPILER_RT_VISIBILITY
 void llvm_delete_flush_function_list(void) {
  while (flush_fn_head) {
    struct flush_fn_node *node = flush_fn_head;
@ -535,6 +548,7 @@ void llvm_delete_flush_function_list(void) {
  flush_fn_head = flush_fn_tail = NULL;
 }

+COMPILER_RT_VISIBILITY
 void llvm_gcov_init(writeout_fn wfn, flush_fn ffn) {
  static int atexit_ran = 0;

--- a/contrib/libc++/CREDITS.TXT
+++ b/contrib/libc++/CREDITS.TXT
@ -101,7 +101,7 @@ E: nico.rieck@gmail.com
 D: Windows fixes

 N: Jon Roelofs
-E: jonathan@codesourcery.com
+E: jroelofS@jroelofs.com
 D: Remote testing, Newlib port, baremetal/single-threaded support.

 N: Jonathan Sauer
--- a/contrib/libc++/include/__config
+++ b/contrib/libc++/include/__config
@ -461,7 +461,11 @@ namespace std {
 #endif 

 #if __has_builtin(__builtin_launder)
-#define	_LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
+#define _LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
+#endif
+
+#if !__is_identifier(__has_unique_object_representations)
+#define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS
 #endif

 #elif defined(_LIBCPP_COMPILER_GCC)
@ -547,7 +551,11 @@ namespace std {
 #endif

 #if _GNUC_VER >= 700
-#define	_LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
+#define _LIBCPP_COMPILER_HAS_BUILTIN_LAUNDER
+#endif
+
+#if _GNUC_VER >= 700
+#define _LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS
 #endif

 #elif defined(_LIBCPP_COMPILER_MSVC)
@ -980,9 +988,10 @@ template <unsigned> struct __static_assert_check {};
 #define _LIBCPP_NODISCARD_AFTER_CXX17
 #endif

-// FIXME: Remove all usages of this macro once compilers catch up.
-#if !defined(__cpp_inline_variables) || (__cpp_inline_variables < 201606L)
-# define _LIBCPP_HAS_NO_INLINE_VARIABLES
+#if _LIBCPP_STD_VER > 14 && defined(__cpp_inline_variables) && (__cpp_inline_variables >= 201606L)
+# define _LIBCPP_INLINE_VAR  inline
+#else
+# define _LIBCPP_INLINE_VAR  
 #endif

 #ifdef _LIBCPP_HAS_NO_RVALUE_REFERENCES
--- a/contrib/libc++/include/__functional_base
+++ b/contrib/libc++/include/__functional_base
@ -564,7 +564,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_arg_t { };
 #if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_MEMORY)
 extern const allocator_arg_t allocator_arg;
 #else
-constexpr allocator_arg_t allocator_arg = allocator_arg_t();
+/* _LIBCPP_INLINE_VAR */ constexpr allocator_arg_t allocator_arg = allocator_arg_t();
 #endif

 // uses_allocator
@ -601,7 +601,7 @@ struct _LIBCPP_TEMPLATE_VIS uses_allocator

 #if _LIBCPP_STD_VER > 14
 template <class _Tp, class _Alloc>
-constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value;
+_LIBCPP_INLINE_VAR constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Alloc>::value;
 #endif

 #ifndef _LIBCPP_CXX03_LANG
--- a/contrib/libc++/include/__mutex_base
+++ b/contrib/libc++/include/__mutex_base
@ -82,9 +82,9 @@ extern const adopt_lock_t  adopt_lock;

 #else

-constexpr defer_lock_t  defer_lock  = defer_lock_t();
-constexpr try_to_lock_t try_to_lock = try_to_lock_t();
-constexpr adopt_lock_t  adopt_lock  = adopt_lock_t();
+/* _LIBCPP_INLINE_VAR */ constexpr defer_lock_t  defer_lock  = defer_lock_t();
+/* _LIBCPP_INLINE_VAR */ constexpr try_to_lock_t try_to_lock = try_to_lock_t();
+/* _LIBCPP_INLINE_VAR */ constexpr adopt_lock_t  adopt_lock  = adopt_lock_t();

 #endif

--- a/contrib/libc++/include/chrono
+++ b/contrib/libc++/include/chrono
@ -26,7 +26,7 @@ duration_cast(const duration<Rep, Period>& fd);

 template <class Rep> struct treat_as_floating_point : is_floating_point<Rep> {};

-template <class Rep> constexpr bool treat_as_floating_point_v
+template <class Rep> inline constexpr bool treat_as_floating_point_v
    = treat_as_floating_point<Rep>::value;                       // C++17

 template <class Rep>
@ -419,7 +419,8 @@ template <class _Rep>
 struct _LIBCPP_TEMPLATE_VIS treat_as_floating_point : is_floating_point<_Rep> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Rep> _LIBCPP_CONSTEXPR bool treat_as_floating_point_v
+template <class _Rep>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool treat_as_floating_point_v
    = treat_as_floating_point<_Rep>::value;
 #endif

--- a/contrib/libc++/include/functional
+++ b/contrib/libc++/include/functional
@ -213,9 +213,9 @@ template<class T> struct is_bind_expression;
 template<class T> struct is_placeholder;

    // See C++14 20.9.9, Function object binders
-template <class T> constexpr bool is_bind_expression_v
+template <class T> inline constexpr bool is_bind_expression_v
  = is_bind_expression<T>::value; // C++17
-template <class T> constexpr int is_placeholder_v
+template <class T> inline constexpr int is_placeholder_v
  = is_placeholder<T>::value; // C++17


@ -1991,7 +1991,7 @@ template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_bind_expression

 #if _LIBCPP_STD_VER > 14
 template <class _Tp>
-constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t is_bind_expression_v = is_bind_expression<_Tp>::value;
 #endif

 template<class _Tp> struct __is_placeholder : public integral_constant<int, 0> {};
@ -2000,7 +2000,7 @@ template<class _Tp> struct _LIBCPP_TEMPLATE_VIS is_placeholder

 #if _LIBCPP_STD_VER > 14
 template <class _Tp>
-constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t is_placeholder_v = is_placeholder<_Tp>::value;
 #endif

 namespace placeholders
@ -2020,16 +2020,16 @@ _LIBCPP_FUNC_VIS extern const __ph<8>   _8;
 _LIBCPP_FUNC_VIS extern const __ph<9>   _9;
 _LIBCPP_FUNC_VIS extern const __ph<10> _10;
 #else
-constexpr __ph<1>   _1{};
-constexpr __ph<2>   _2{};
-constexpr __ph<3>   _3{};
-constexpr __ph<4>   _4{};
-constexpr __ph<5>   _5{};
-constexpr __ph<6>   _6{};
-constexpr __ph<7>   _7{};
-constexpr __ph<8>   _8{};
-constexpr __ph<9>   _9{};
-constexpr __ph<10> _10{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<1>   _1{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<2>   _2{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<3>   _3{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<4>   _4{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<5>   _5{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<6>   _6{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<7>   _7{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<8>   _8{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<9>   _9{};
+/* _LIBCPP_INLINE_VAR */ constexpr __ph<10> _10{};
 #endif // defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_BIND)

 }  // placeholders
--- a/contrib/libc++/include/memory
+++ b/contrib/libc++/include/memory
@ -18,7 +18,7 @@ namespace std
 {

 struct allocator_arg_t { };
-constexpr allocator_arg_t allocator_arg = allocator_arg_t();
+inline constexpr allocator_arg_t allocator_arg = allocator_arg_t();

 template <class T, class Alloc> struct uses_allocator;

@ -631,6 +631,9 @@ template <class T> struct hash;
 template <class T, class D> struct hash<unique_ptr<T, D> >;
 template <class T> struct hash<shared_ptr<T> >;

+template <class T, class Alloc>
+  inline constexpr bool uses_allocator_v = uses_allocator<T, Alloc>::value;
+
 // Pointer safety
 enum class pointer_safety { relaxed, preferred, strict };
 void declare_reachable(void *p);
--- a/contrib/libc++/include/mutex
+++ b/contrib/libc++/include/mutex
@ -91,9 +91,9 @@ struct defer_lock_t {};
 struct try_to_lock_t {};
 struct adopt_lock_t {};

-constexpr defer_lock_t  defer_lock{};
-constexpr try_to_lock_t try_to_lock{};
-constexpr adopt_lock_t  adopt_lock{};
+inline constexpr defer_lock_t  defer_lock{};
+inline constexpr try_to_lock_t try_to_lock{};
+inline constexpr adopt_lock_t  adopt_lock{};

 template <class Mutex>
 class lock_guard
--- a/contrib/libc++/include/optional
+++ b/contrib/libc++/include/optional
@ -22,7 +22,7 @@ namespace std {

  // 23.6.4, no-value state indicator
  struct nullopt_t{see below };
-  constexpr nullopt_t nullopt(unspecified );
+  inline constexpr nullopt_t nullopt(unspecified );

  // 23.6.5, class bad_optional_access
  class bad_optional_access;
@ -195,7 +195,7 @@ struct nullopt_t
    _LIBCPP_INLINE_VISIBILITY constexpr explicit nullopt_t(__secret_tag, __secret_tag) noexcept {}
 };

-/* inline */ constexpr nullopt_t nullopt{nullopt_t::__secret_tag{}, nullopt_t::__secret_tag{}};
+_LIBCPP_INLINE_VAR constexpr nullopt_t nullopt{nullopt_t::__secret_tag{}, nullopt_t::__secret_tag{}};

 template <class _Tp, bool = is_trivially_destructible<_Tp>::value>
 struct __optional_destruct_base;
--- a/contrib/libc++/include/ratio
+++ b/contrib/libc++/include/ratio
@ -63,17 +63,17 @@ typedef ratio<   1000000000000000000000, 1> zetta;  // not supported
 typedef ratio<1000000000000000000000000, 1> yotta;  // not supported

  // 20.11.5, ratio comparison
-  template <class R1, class R2> constexpr bool ratio_equal_v
+  template <class R1, class R2> inline constexpr bool ratio_equal_v
    = ratio_equal<R1, R2>::value;                                       // C++17
-  template <class R1, class R2> constexpr bool ratio_not_equal_v
+  template <class R1, class R2> inline constexpr bool ratio_not_equal_v
    = ratio_not_equal<R1, R2>::value;                                   // C++17
-  template <class R1, class R2> constexpr bool ratio_less_v
+  template <class R1, class R2> inline constexpr bool ratio_less_v
    = ratio_less<R1, R2>::value;                                        // C++17
-  template <class R1, class R2> constexpr bool ratio_less_equal_v
+  template <class R1, class R2> inline constexpr bool ratio_less_equal_v
    = ratio_less_equal<R1, R2>::value;                                  // C++17
-  template <class R1, class R2> constexpr bool ratio_greater_v
+  template <class R1, class R2> inline constexpr bool ratio_greater_v
    = ratio_greater<R1, R2>::value;                                     // C++17
-  template <class R1, class R2> constexpr bool ratio_greater_equal_v
+  template <class R1, class R2> inline constexpr bool ratio_greater_equal_v
    = ratio_greater_equal<R1, R2>::value;                               // C++17
 }
 */
@ -501,22 +501,28 @@ struct __ratio_gcd
 };

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_equal_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_equal_v
    = ratio_equal<_R1, _R2>::value;

-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_not_equal_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_not_equal_v
    = ratio_not_equal<_R1, _R2>::value;

-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_less_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_less_v
    = ratio_less<_R1, _R2>::value;

-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_less_equal_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_less_equal_v
    = ratio_less_equal<_R1, _R2>::value;

-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_greater_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_greater_v
    = ratio_greater<_R1, _R2>::value;

-template <class _R1, class _R2> _LIBCPP_CONSTEXPR bool ratio_greater_equal_v
+template <class _R1, class _R2>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool ratio_greater_equal_v
    = ratio_greater_equal<_R1, _R2>::value;
 #endif

--- a/contrib/libc++/include/system_error
+++ b/contrib/libc++/include/system_error
@ -47,10 +47,10 @@ template <class T> struct is_error_condition_enum
    : public false_type {};

 template <class _Tp>
-constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value; // C++17
+inline constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value; // C++17

 template <class _Tp>
-constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value; // C++17
+inline constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value; // C++17

 class error_code
 {
@ -246,7 +246,7 @@ struct _LIBCPP_TEMPLATE_VIS is_error_code_enum

 #if _LIBCPP_STD_VER > 14
 template <class _Tp>
-constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t is_error_code_enum_v = is_error_code_enum<_Tp>::value;
 #endif

 // is_error_condition_enum
@ -257,7 +257,7 @@ struct _LIBCPP_TEMPLATE_VIS is_error_condition_enum

 #if _LIBCPP_STD_VER > 14
 template <class _Tp>
-constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t is_error_condition_enum_v = is_error_condition_enum<_Tp>::value;
 #endif

 // Some error codes are not present on all platforms, so we provide equivalents
--- a/contrib/libc++/include/tuple
+++ b/contrib/libc++/include/tuple
@ -70,7 +70,7 @@ public:
    void swap(tuple&) noexcept(AND(swap(declval<T&>(), declval<T&>())...));
 };

-const unspecified ignore;
+inline constexpr unspecified ignore;

 template <class... T> tuple<V...>  make_tuple(T&&...); // constexpr in C++14
 template <class... T> tuple<ATypes...> forward_as_tuple(T&&...) noexcept; // constexpr in C++14
@ -87,7 +87,7 @@ template <class T, class Tuple>
 template <class T> class tuple_size; // undefined
 template <class... T> class tuple_size<tuple<T...>>;
 template <class T>
- constexpr size_t tuple_size_v = tuple_size<T>::value; // C++17
+ inline constexpr size_t tuple_size_v = tuple_size<T>::value; // C++17
 template <size_t I, class T> class tuple_element; // undefined
 template <size_t I, class... T> class tuple_element<I, tuple<T...>>;
 template <size_t I, class T>
@ -1079,7 +1079,7 @@ struct __ignore_t
 };

 namespace {
-  constexpr __ignore_t<unsigned char> ignore = __ignore_t<unsigned char>();
+  _LIBCPP_INLINE_VAR constexpr __ignore_t<unsigned char> ignore = __ignore_t<unsigned char>();
 }

 template <class _Tp>
@ -1368,7 +1368,7 @@ pair<_T1, _T2>::pair(piecewise_construct_t,

 #if _LIBCPP_STD_VER > 14
 template <class _Tp>
-constexpr size_t tuple_size_v = tuple_size<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t tuple_size_v = tuple_size<_Tp>::value;

 #define _LIBCPP_NOEXCEPT_RETURN(...) noexcept(noexcept(__VA_ARGS__)) { return __VA_ARGS__; }

--- a/contrib/libc++/include/type_traits
+++ b/contrib/libc++/include/type_traits
@ -132,6 +132,8 @@ namespace std

    template <class T> struct has_virtual_destructor;

+    template<class T> struct has_unique_object_representations;         // C++17
+
    // Relationships between types:
    template <class T, class U> struct is_same;
    template <class Base, class Derived> struct is_base_of;
@ -223,173 +225,175 @@ namespace std
      using void_t = void;   // C++17
      
      // See C++14 20.10.4.1, primary type categories
-      template <class T> constexpr bool is_void_v
+      template <class T> inline constexpr bool is_void_v
        = is_void<T>::value;                                             // C++17
-      template <class T> constexpr bool is_null_pointer_v
+      template <class T> inline constexpr bool is_null_pointer_v
        = is_null_pointer<T>::value;                                     // C++17
-      template <class T> constexpr bool is_integral_v
+      template <class T> inline constexpr bool is_integral_v
        = is_integral<T>::value;                                         // C++17
-      template <class T> constexpr bool is_floating_point_v
+      template <class T> inline constexpr bool is_floating_point_v
        = is_floating_point<T>::value;                                   // C++17
-      template <class T> constexpr bool is_array_v
+      template <class T> inline constexpr bool is_array_v
        = is_array<T>::value;                                            // C++17
-      template <class T> constexpr bool is_pointer_v
+      template <class T> inline constexpr bool is_pointer_v
        = is_pointer<T>::value;                                          // C++17
-      template <class T> constexpr bool is_lvalue_reference_v
+      template <class T> inline constexpr bool is_lvalue_reference_v
        = is_lvalue_reference<T>::value;                                 // C++17
-      template <class T> constexpr bool is_rvalue_reference_v
+      template <class T> inline constexpr bool is_rvalue_reference_v
        = is_rvalue_reference<T>::value;                                 // C++17
-      template <class T> constexpr bool is_member_object_pointer_v
+      template <class T> inline constexpr bool is_member_object_pointer_v
        = is_member_object_pointer<T>::value;                            // C++17
-      template <class T> constexpr bool is_member_function_pointer_v
+      template <class T> inline constexpr bool is_member_function_pointer_v
        = is_member_function_pointer<T>::value;                          // C++17
-      template <class T> constexpr bool is_enum_v
+      template <class T> inline constexpr bool is_enum_v
        = is_enum<T>::value;                                             // C++17
-      template <class T> constexpr bool is_union_v
+      template <class T> inline constexpr bool is_union_v
        = is_union<T>::value;                                            // C++17
-      template <class T> constexpr bool is_class_v
+      template <class T> inline constexpr bool is_class_v
        = is_class<T>::value;                                            // C++17
-      template <class T> constexpr bool is_function_v
+      template <class T> inline constexpr bool is_function_v
        = is_function<T>::value;                                         // C++17

      // See C++14 20.10.4.2, composite type categories
-      template <class T> constexpr bool is_reference_v
+      template <class T> inline constexpr bool is_reference_v
        = is_reference<T>::value;                                        // C++17
-      template <class T> constexpr bool is_arithmetic_v
+      template <class T> inline constexpr bool is_arithmetic_v
        = is_arithmetic<T>::value;                                       // C++17
-      template <class T> constexpr bool is_fundamental_v
+      template <class T> inline constexpr bool is_fundamental_v
        = is_fundamental<T>::value;                                      // C++17
-      template <class T> constexpr bool is_object_v
+      template <class T> inline constexpr bool is_object_v
        = is_object<T>::value;                                           // C++17
-      template <class T> constexpr bool is_scalar_v
+      template <class T> inline constexpr bool is_scalar_v
        = is_scalar<T>::value;                                           // C++17
-      template <class T> constexpr bool is_compound_v
+      template <class T> inline constexpr bool is_compound_v
        = is_compound<T>::value;                                         // C++17
-      template <class T> constexpr bool is_member_pointer_v
+      template <class T> inline constexpr bool is_member_pointer_v
        = is_member_pointer<T>::value;                                   // C++17

      // See C++14 20.10.4.3, type properties
-      template <class T> constexpr bool is_const_v
+      template <class T> inline constexpr bool is_const_v
        = is_const<T>::value;                                            // C++17
-      template <class T> constexpr bool is_volatile_v
+      template <class T> inline constexpr bool is_volatile_v
        = is_volatile<T>::value;                                         // C++17
-      template <class T> constexpr bool is_trivial_v
+      template <class T> inline constexpr bool is_trivial_v
        = is_trivial<T>::value;                                          // C++17
-      template <class T> constexpr bool is_trivially_copyable_v
+      template <class T> inline constexpr bool is_trivially_copyable_v
        = is_trivially_copyable<T>::value;                               // C++17
-      template <class T> constexpr bool is_standard_layout_v
+      template <class T> inline constexpr bool is_standard_layout_v
        = is_standard_layout<T>::value;                                  // C++17
-      template <class T> constexpr bool is_pod_v
+      template <class T> inline constexpr bool is_pod_v
        = is_pod<T>::value;                                              // C++17
-      template <class T> constexpr bool is_literal_type_v
+      template <class T> inline constexpr bool is_literal_type_v
        = is_literal_type<T>::value;                                     // C++17
-      template <class T> constexpr bool is_empty_v
+      template <class T> inline constexpr bool is_empty_v
        = is_empty<T>::value;                                            // C++17
-      template <class T> constexpr bool is_polymorphic_v
+      template <class T> inline constexpr bool is_polymorphic_v
        = is_polymorphic<T>::value;                                      // C++17
-      template <class T> constexpr bool is_abstract_v
+      template <class T> inline constexpr bool is_abstract_v
        = is_abstract<T>::value;                                         // C++17
-      template <class T> constexpr bool is_final_v
+      template <class T> inline constexpr bool is_final_v
        = is_final<T>::value;                                            // C++17
-      template <class T> constexpr bool is_aggregate_v
+      template <class T> inline constexpr bool is_aggregate_v
        = is_aggregate<T>::value;                                        // C++17
-      template <class T> constexpr bool is_signed_v
+      template <class T> inline constexpr bool is_signed_v
        = is_signed<T>::value;                                           // C++17
-      template <class T> constexpr bool is_unsigned_v
+      template <class T> inline constexpr bool is_unsigned_v
        = is_unsigned<T>::value;                                         // C++17
-      template <class T, class... Args> constexpr bool is_constructible_v
+      template <class T, class... Args> inline constexpr bool is_constructible_v
        = is_constructible<T, Args...>::value;                           // C++17
-      template <class T> constexpr bool is_default_constructible_v
+      template <class T> inline constexpr bool is_default_constructible_v
        = is_default_constructible<T>::value;                            // C++17
-      template <class T> constexpr bool is_copy_constructible_v
+      template <class T> inline constexpr bool is_copy_constructible_v
        = is_copy_constructible<T>::value;                               // C++17
-      template <class T> constexpr bool is_move_constructible_v
+      template <class T> inline constexpr bool is_move_constructible_v
        = is_move_constructible<T>::value;                               // C++17
-      template <class T, class U> constexpr bool is_assignable_v
+      template <class T, class U> inline constexpr bool is_assignable_v
        = is_assignable<T, U>::value;                                    // C++17
-      template <class T> constexpr bool is_copy_assignable_v
+      template <class T> inline constexpr bool is_copy_assignable_v
        = is_copy_assignable<T>::value;                                  // C++17
-      template <class T> constexpr bool is_move_assignable_v
+      template <class T> inline constexpr bool is_move_assignable_v
        = is_move_assignable<T>::value;                                  // C++17
-      template <class T, class U> constexpr bool is_swappable_with_v
+      template <class T, class U> inline constexpr bool is_swappable_with_v
        = is_swappable_with<T, U>::value;                                // C++17
-      template <class T> constexpr bool is_swappable_v
+      template <class T> inline constexpr bool is_swappable_v
        = is_swappable<T>::value;                                        // C++17
-      template <class T> constexpr bool is_destructible_v
+      template <class T> inline constexpr bool is_destructible_v
        = is_destructible<T>::value;                                     // C++17
-      template <class T, class... Args> constexpr bool is_trivially_constructible_v
+      template <class T, class... Args> inline constexpr bool is_trivially_constructible_v
        = is_trivially_constructible<T, Args...>::value;                 // C++17
-      template <class T> constexpr bool is_trivially_default_constructible_v
+      template <class T> inline constexpr bool is_trivially_default_constructible_v
        = is_trivially_default_constructible<T>::value;                  // C++17
-      template <class T> constexpr bool is_trivially_copy_constructible_v
+      template <class T> inline constexpr bool is_trivially_copy_constructible_v
        = is_trivially_copy_constructible<T>::value;                     // C++17
-      template <class T> constexpr bool is_trivially_move_constructible_v
+      template <class T> inline constexpr bool is_trivially_move_constructible_v
        = is_trivially_move_constructible<T>::value;                     // C++17
-      template <class T, class U> constexpr bool is_trivially_assignable_v
+      template <class T, class U> inline constexpr bool is_trivially_assignable_v
        = is_trivially_assignable<T, U>::value;                          // C++17
-      template <class T> constexpr bool is_trivially_copy_assignable_v
+      template <class T> inline constexpr bool is_trivially_copy_assignable_v
        = is_trivially_copy_assignable<T>::value;                        // C++17
-      template <class T> constexpr bool is_trivially_move_assignable_v
+      template <class T> inline constexpr bool is_trivially_move_assignable_v
        = is_trivially_move_assignable<T>::value;                        // C++17
-      template <class T> constexpr bool is_trivially_destructible_v
+      template <class T> inline constexpr bool is_trivially_destructible_v
        = is_trivially_destructible<T>::value;                           // C++17
-      template <class T, class... Args> constexpr bool is_nothrow_constructible_v
+      template <class T, class... Args> inline constexpr bool is_nothrow_constructible_v
        = is_nothrow_constructible<T, Args...>::value;                   // C++17
-      template <class T> constexpr bool is_nothrow_default_constructible_v
+      template <class T> inline constexpr bool is_nothrow_default_constructible_v
        = is_nothrow_default_constructible<T>::value;                    // C++17
-      template <class T> constexpr bool is_nothrow_copy_constructible_v
+      template <class T> inline constexpr bool is_nothrow_copy_constructible_v
        = is_nothrow_copy_constructible<T>::value;                       // C++17
-      template <class T> constexpr bool is_nothrow_move_constructible_v
+      template <class T> inline constexpr bool is_nothrow_move_constructible_v
        = is_nothrow_move_constructible<T>::value;                       // C++17
-      template <class T, class U> constexpr bool is_nothrow_assignable_v
+      template <class T, class U> inline constexpr bool is_nothrow_assignable_v
        = is_nothrow_assignable<T, U>::value;                            // C++17
-      template <class T> constexpr bool is_nothrow_copy_assignable_v
+      template <class T> inline constexpr bool is_nothrow_copy_assignable_v
        = is_nothrow_copy_assignable<T>::value;                          // C++17
-      template <class T> constexpr bool is_nothrow_move_assignable_v
+      template <class T> inline constexpr bool is_nothrow_move_assignable_v
        = is_nothrow_move_assignable<T>::value;                          // C++17
-      template <class T, class U> constexpr bool is_nothrow_swappable_with_v
+      template <class T, class U> inline constexpr bool is_nothrow_swappable_with_v
        = is_nothrow_swappable_with<T, U>::value;                       // C++17
-      template <class T> constexpr bool is_nothrow_swappable_v
+      template <class T> inline constexpr bool is_nothrow_swappable_v
        = is_nothrow_swappable<T>::value;                               // C++17
-      template <class T> constexpr bool is_nothrow_destructible_v
+      template <class T> inline constexpr bool is_nothrow_destructible_v
        = is_nothrow_destructible<T>::value;                             // C++17
-      template <class T> constexpr bool has_virtual_destructor_v
+      template <class T> inline constexpr bool has_virtual_destructor_v
        = has_virtual_destructor<T>::value;                              // C++17
+      template<class T> inline constexpr bool has_unique_object_representations_v // C++17
+        = has_unique_object_representations<T>::value;

      // See C++14 20.10.5, type property queries
-      template <class T> constexpr size_t alignment_of_v
+      template <class T> inline constexpr size_t alignment_of_v
        = alignment_of<T>::value;                                        // C++17
-      template <class T> constexpr size_t rank_v
+      template <class T> inline constexpr size_t rank_v
        = rank<T>::value;                                                // C++17
-      template <class T, unsigned I = 0> constexpr size_t extent_v
+      template <class T, unsigned I = 0> inline constexpr size_t extent_v
        = extent<T, I>::value;                                           // C++17

      // See C++14 20.10.6, type relations
-      template <class T, class U> constexpr bool is_same_v
+      template <class T, class U> inline constexpr bool is_same_v
        = is_same<T, U>::value;                                          // C++17
-      template <class Base, class Derived> constexpr bool is_base_of_v
+      template <class Base, class Derived> inline constexpr bool is_base_of_v
        = is_base_of<Base, Derived>::value;                              // C++17
-      template <class From, class To> constexpr bool is_convertible_v
+      template <class From, class To> inline constexpr bool is_convertible_v
        = is_convertible<From, To>::value;                               // C++17
-      template <class Fn, class... ArgTypes> constexpr bool is_invocable_v
+      template <class Fn, class... ArgTypes> inline constexpr bool is_invocable_v
        = is_invocable<Fn, ArgTypes...>::value;                          // C++17
-      template <class R, class Fn, class... ArgTypes> constexpr bool is_invocable_r_v
+      template <class R, class Fn, class... ArgTypes> inline constexpr bool is_invocable_r_v
        = is_invocable_r<R, Fn, ArgTypes...>::value;                     // C++17
-      template <class Fn, class... ArgTypes> constexpr bool is_nothrow_invocable_v
+      template <class Fn, class... ArgTypes> inline constexpr bool is_nothrow_invocable_v
        = is_nothrow_invocable<Fn, ArgTypes...>::value;                  // C++17
-      template <class R, class Fn, class... ArgTypes> constexpr bool is_nothrow_invocable_r_v
+      template <class R, class Fn, class... ArgTypes> inline constexpr bool is_nothrow_invocable_r_v
        = is_nothrow_invocable_r<R, Fn, ArgTypes...>::value;             // C++17

      // [meta.logical], logical operator traits:
      template<class... B> struct conjunction;                           // C++17
      template<class... B> 
-        constexpr bool conjunction_v = conjunction<B...>::value;         // C++17
+        inline constexpr bool conjunction_v = conjunction<B...>::value;  // C++17
      template<class... B> struct disjunction;                           // C++17
      template<class... B>
-        constexpr bool disjunction_v = disjunction<B...>::value;         // C++17
+        inline constexpr bool disjunction_v = disjunction<B...>::value;  // C++17
      template<class B> struct negation;                                 // C++17
      template<class B> 
-        constexpr bool negation_v = negation<B>::value;                  // C++17
+        inline constexpr bool negation_v = negation<B>::value;           // C++17

 }

@ -619,7 +623,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_const            : public fa
 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_const<_Tp const> : public true_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_const_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_const_v
    = is_const<_Tp>::value;
 #endif

@ -629,7 +634,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_volatile               : pub
 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_volatile<_Tp volatile> : public true_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_volatile_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_volatile_v
    = is_volatile<_Tp>::value;
 #endif

@ -666,7 +672,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_void
    : public __libcpp_is_void<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_void_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_void_v
    = is_void<_Tp>::value;
 #endif

@ -683,7 +690,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_null_pointer
    : public __is_nullptr_t_impl<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_null_pointer_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_null_pointer_v
    = is_null_pointer<_Tp>::value;
 #endif
 #endif
@ -717,7 +725,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_integral
    : public __libcpp_is_integral<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_integral_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_integral_v
    = is_integral<_Tp>::value;
 #endif

@ -732,7 +741,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_floating_point
    : public __libcpp_is_floating_point<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_floating_point_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_floating_point_v
    = is_floating_point<_Tp>::value;
 #endif

@ -746,7 +756,8 @@ template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS is_array<_Tp[_Np]>
    : public true_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_array_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_array_v
    = is_array<_Tp>::value;
 #endif

@ -759,7 +770,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pointer
    : public __libcpp_is_pointer<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_pointer_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pointer_v
    = is_pointer<_Tp>::value;
 #endif

@ -780,13 +792,16 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_reference<_Tp&&> : public tr
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_reference_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_reference_v
    = is_reference<_Tp>::value;

-template <class _Tp> _LIBCPP_CONSTEXPR bool is_lvalue_reference_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_lvalue_reference_v
    = is_lvalue_reference<_Tp>::value;

-template <class _Tp> _LIBCPP_CONSTEXPR bool is_rvalue_reference_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_rvalue_reference_v
    = is_rvalue_reference<_Tp>::value;
 #endif
 // is_union
@ -805,7 +820,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_union
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_union_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_union_v
    = is_union<_Tp>::value;
 #endif

@ -830,7 +846,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_class
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_class_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_class_v
    = is_class<_Tp>::value;
 #endif

@ -840,7 +857,8 @@ template <class _Tp, class _Up> struct _LIBCPP_TEMPLATE_VIS is_same           :
 template <class _Tp>            struct _LIBCPP_TEMPLATE_VIS is_same<_Tp, _Tp> : public true_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp, class _Up> _LIBCPP_CONSTEXPR bool is_same_v
+template <class _Tp, class _Up>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_same_v
    = is_same<_Tp, _Up>::value;
 #endif

@ -870,7 +888,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_function
    : public __libcpp_is_function<_Tp> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_function_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_function_v
    = is_function<_Tp>::value;
 #endif

@ -897,7 +916,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer
    : public __libcpp_is_member_function_pointer<typename remove_cv<_Tp>::type>::type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_function_pointer_v
    = is_member_function_pointer<_Tp>::value;
 #endif

@ -910,7 +930,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_pointer
    : public __libcpp_is_member_pointer<typename remove_cv<_Tp>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_member_pointer_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_pointer_v
    = is_member_pointer<_Tp>::value;
 #endif

@ -921,7 +942,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer
                                    !is_member_function_pointer<_Tp>::value> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_member_object_pointer_v
    = is_member_object_pointer<_Tp>::value;
 #endif

@ -949,7 +971,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_enum
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_enum_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_enum_v
    = is_enum<_Tp>::value;
 #endif

@ -960,7 +983,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_arithmetic
                                     is_floating_point<_Tp>::value> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_arithmetic_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_arithmetic_v
    = is_arithmetic<_Tp>::value;
 #endif

@ -972,7 +996,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_fundamental
                                     is_arithmetic<_Tp>::value> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_fundamental_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_fundamental_v
    = is_fundamental<_Tp>::value;
 #endif

@ -988,7 +1013,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_scalar
 template <> struct _LIBCPP_TEMPLATE_VIS is_scalar<nullptr_t> : public true_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_scalar_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_scalar_v
    = is_scalar<_Tp>::value;
 #endif

@ -1001,7 +1027,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_object
                                     is_class<_Tp>::value  > {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_object_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_object_v
    = is_object<_Tp>::value;
 #endif

@ -1011,7 +1038,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_compound
    : public integral_constant<bool, !is_fundamental<_Tp>::value> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_compound_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_compound_v
    = is_compound<_Tp>::value;
 #endif

@ -1210,7 +1238,8 @@ template <class _Tp> struct __libcpp_is_signed<_Tp, false> : public false_type {
 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_signed : public __libcpp_is_signed<_Tp> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_signed_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_signed_v
    = is_signed<_Tp>::value;
 #endif

@ -1230,7 +1259,8 @@ template <class _Tp> struct __libcpp_is_unsigned<_Tp, false> : public false_type
 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_unsigned : public __libcpp_is_unsigned<_Tp> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_unsigned_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_unsigned_v
    = is_unsigned<_Tp>::value;
 #endif

@ -1244,7 +1274,8 @@ template <class _Tp, size_t _Np> struct _LIBCPP_TEMPLATE_VIS rank<_Tp[_Np]>
    : public integral_constant<size_t, rank<_Tp>::value + 1> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR size_t rank_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t rank_v
    = rank<_Tp>::value;
 #endif

@ -1262,7 +1293,8 @@ template <class _Tp, size_t _Np, unsigned _Ip> struct _LIBCPP_TEMPLATE_VIS exten
    : public integral_constant<size_t, extent<_Tp, _Ip-1>::value> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp, unsigned _Ip = 0> _LIBCPP_CONSTEXPR size_t extent_v
+template <class _Tp, unsigned _Ip = 0>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t extent_v
    = extent<_Tp, _Ip>::value;
 #endif

@ -1334,7 +1366,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_abstract
    : public integral_constant<bool, __is_abstract(_Tp)> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_abstract_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_abstract_v
    = is_abstract<_Tp>::value;
 #endif

@ -1354,7 +1387,8 @@ is_final : public integral_constant<bool, __is_final(_Tp)> {};
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_final_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_final_v
    = is_final<_Tp>::value;
 #endif

@ -1366,7 +1400,8 @@ is_aggregate : public integral_constant<bool, __is_aggregate(_Tp)> {};

 #if !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
 template <class _Tp>
-constexpr bool is_aggregate_v = is_aggregate<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_aggregate_v
+    = is_aggregate<_Tp>::value;
 #endif

 #endif // _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_IS_AGGREGATE)
@ -1407,7 +1442,8 @@ struct _LIBCPP_TEMPLATE_VIS is_base_of
 #endif  // _LIBCPP_HAS_IS_BASE_OF

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Bp, class _Dp> _LIBCPP_CONSTEXPR bool is_base_of_v
+template <class _Bp, class _Dp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_base_of_v
    = is_base_of<_Bp, _Dp>::value;
 #endif

@ -1497,7 +1533,8 @@ template <class _T1, class _T2> struct _LIBCPP_TEMPLATE_VIS is_convertible
 #endif  // __has_feature(is_convertible_to)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _From, class _To> _LIBCPP_CONSTEXPR bool is_convertible_v
+template <class _From, class _To>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_convertible_v
    = is_convertible<_From, _To>::value;
 #endif

@ -1533,7 +1570,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_empty : public __libcpp_empt
 #endif  // __has_feature(is_empty)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_empty_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_empty_v
    = is_empty<_Tp>::value;
 #endif

@ -1558,7 +1596,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_polymorphic
 #endif // __has_feature(is_polymorphic)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_polymorphic_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_polymorphic_v
    = is_polymorphic<_Tp>::value;
 #endif

@ -1577,17 +1616,35 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS has_virtual_destructor
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool has_virtual_destructor_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_virtual_destructor_v
    = has_virtual_destructor<_Tp>::value;
 #endif

+// has_unique_object_representations
+
+#if _LIBCPP_STD_VER > 14 && defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS)
+
+template <class _Tp> struct _LIBCPP_TEMPLATE_VIS has_unique_object_representations
+    : public integral_constant<bool, 
+       __has_unique_object_representations(remove_cv_t<remove_all_extents_t<_Tp>>)> {};
+
+#if !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool has_unique_object_representations_v
+    = has_unique_object_representations<_Tp>::value;
+#endif
+
+#endif
+
 // alignment_of

 template <class _Tp> struct _LIBCPP_TEMPLATE_VIS alignment_of
    : public integral_constant<size_t, __alignof__(_Tp)> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR size_t alignment_of_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR size_t alignment_of_v
    = alignment_of<_Tp>::value;
 #endif

@ -2126,7 +2183,8 @@ struct is_assignable
    : public __is_assignable_imp<_Tp, _Arg> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp, class _Arg> _LIBCPP_CONSTEXPR bool is_assignable_v
+template <class _Tp, class _Arg>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_assignable_v
    = is_assignable<_Tp, _Arg>::value;
 #endif

@ -2137,7 +2195,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_copy_assignable
                  typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_copy_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_copy_assignable_v
    = is_copy_assignable<_Tp>::value;
 #endif

@ -2152,7 +2211,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_move_assignable
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_move_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_move_assignable_v
    = is_move_assignable<_Tp>::value;
 #endif

@ -2215,7 +2275,8 @@ struct is_destructible<void>
    : public _VSTD::false_type {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_destructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_destructible_v
    = is_destructible<_Tp>::value;
 #endif

@ -3260,7 +3321,8 @@ struct __is_constructible2_imp<false, _Ap[], _A0, _A1>


 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) && !defined(_LIBCPP_HAS_NO_VARIADICS)
-template <class _Tp, class ..._Args> _LIBCPP_CONSTEXPR bool is_constructible_v
+template <class _Tp, class ..._Args>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_constructible_v
    = is_constructible<_Tp, _Args...>::value;
 #endif

@ -3272,7 +3334,8 @@ struct _LIBCPP_TEMPLATE_VIS is_default_constructible
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_default_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_default_constructible_v
    = is_default_constructible<_Tp>::value;
 #endif

@ -3284,7 +3347,8 @@ struct _LIBCPP_TEMPLATE_VIS is_copy_constructible
                  typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_copy_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_copy_constructible_v
    = is_copy_constructible<_Tp>::value;
 #endif

@ -3300,7 +3364,8 @@ struct _LIBCPP_TEMPLATE_VIS is_move_constructible
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_move_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_move_constructible_v
    = is_move_constructible<_Tp>::value;
 #endif

@ -3432,7 +3497,8 @@ struct _LIBCPP_TEMPLATE_VIS is_trivially_constructible<_Tp, _Tp&,
 #endif  // _LIBCPP_HAS_NO_VARIADICS

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) && !defined(_LIBCPP_HAS_NO_VARIADICS)
-template <class _Tp, class... _Args> _LIBCPP_CONSTEXPR bool is_trivially_constructible_v
+template <class _Tp, class... _Args>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_constructible_v
    = is_trivially_constructible<_Tp, _Args...>::value;
 #endif

@ -3443,7 +3509,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_default_constructi
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_default_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_default_constructible_v
    = is_trivially_default_constructible<_Tp>::value;
 #endif

@ -3454,7 +3521,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copy_constructible
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_copy_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copy_constructible_v
    = is_trivially_copy_constructible<_Tp>::value;
 #endif

@ -3469,7 +3537,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_move_constructible
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_move_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_move_constructible_v
    = is_trivially_move_constructible<_Tp>::value;
 #endif

@ -3512,7 +3581,8 @@ struct is_trivially_assignable<_Tp&, _Tp&&>
 #endif  // !__has_feature(is_trivially_assignable)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp, class _Arg> _LIBCPP_CONSTEXPR bool is_trivially_assignable_v
+template <class _Tp, class _Arg>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_assignable_v
    = is_trivially_assignable<_Tp, _Arg>::value;
 #endif

@ -3523,7 +3593,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copy_assignable
                  typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_copy_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copy_assignable_v
    = is_trivially_copy_assignable<_Tp>::value;
 #endif

@ -3539,7 +3610,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_move_assignable
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_move_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_move_assignable_v
    = is_trivially_move_assignable<_Tp>::value;
 #endif

@ -3565,7 +3637,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_destructible<_Tp[]
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_destructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_destructible_v
    = is_trivially_destructible<_Tp>::value;
 #endif

@ -3730,7 +3803,8 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible<_Tp, _Tp&,
 #endif  // __has_feature(is_nothrow_constructible)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES) && !defined(_LIBCPP_HAS_NO_VARIADICS)
-template <class _Tp, class ..._Args> _LIBCPP_CONSTEXPR bool is_nothrow_constructible_v
+template <class _Tp, class ..._Args>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_constructible_v
    = is_nothrow_constructible<_Tp, _Args...>::value;
 #endif

@ -3741,7 +3815,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_default_constructibl
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_default_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_default_constructible_v
    = is_nothrow_default_constructible<_Tp>::value;
 #endif

@ -3752,7 +3827,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_copy_constructible
                  typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_copy_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_copy_constructible_v
    = is_nothrow_copy_constructible<_Tp>::value;
 #endif

@ -3767,7 +3843,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_move_constructible
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_move_constructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_move_constructible_v
    = is_nothrow_move_constructible<_Tp>::value;
 #endif

@ -3840,7 +3917,8 @@ struct is_nothrow_assignable<_Tp&, _Tp&&>
 #endif  // __has_feature(cxx_noexcept)

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp, class _Arg> _LIBCPP_CONSTEXPR bool is_nothrow_assignable_v
+template <class _Tp, class _Arg>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_assignable_v
    = is_nothrow_assignable<_Tp, _Arg>::value;
 #endif

@ -3851,7 +3929,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_copy_assignable
                  typename add_lvalue_reference<typename add_const<_Tp>::type>::type> {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_copy_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_copy_assignable_v
    = is_nothrow_copy_assignable<_Tp>::value;
 #endif

@ -3867,7 +3946,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_nothrow_move_assignable
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_move_assignable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_move_assignable_v
    = is_nothrow_move_assignable<_Tp>::value;
 #endif

@ -3933,7 +4013,8 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_destructible<_Tp[]>
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_nothrow_destructible_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_nothrow_destructible_v
    = is_nothrow_destructible<_Tp>::value;
 #endif

@ -3955,7 +4036,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_pod
 #endif

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_pod_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_pod_v
    = is_pod<_Tp>::value;
 #endif

@ -3971,7 +4053,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_literal_type
    {};
    
 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_literal_type_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_literal_type_v
    = is_literal_type<_Tp>::value;
 #endif

@ -3986,7 +4069,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_standard_layout
    {};
    
 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_standard_layout_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_standard_layout_v
    = is_standard_layout<_Tp>::value;
 #endif

@ -4003,7 +4087,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivially_copyable
    {};
    
 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivially_copyable_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivially_copyable_v
    = is_trivially_copyable<_Tp>::value;
 #endif

@ -4019,7 +4104,8 @@ template <class _Tp> struct _LIBCPP_TEMPLATE_VIS is_trivial
    {};

 #if _LIBCPP_STD_VER > 14 && !defined(_LIBCPP_HAS_NO_VARIABLE_TEMPLATES)
-template <class _Tp> _LIBCPP_CONSTEXPR bool is_trivial_v
+template <class _Tp>
+_LIBCPP_INLINE_VAR _LIBCPP_CONSTEXPR bool is_trivial_v
    = is_trivial<_Tp>::value;
 #endif

@ -4457,12 +4543,14 @@ struct _LIBCPP_TEMPLATE_VIS is_invocable_r
    : integral_constant<bool, __invokable_r<_Ret, _Fn, _Args...>::value> {};

 template <class _Fn, class ..._Args>
-constexpr bool is_invocable_v = is_invocable<_Fn, _Args...>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_invocable_v
+    = is_invocable<_Fn, _Args...>::value;

 template <class _Ret, class _Fn, class ..._Args>
-constexpr bool is_invocable_r_v = is_invocable_r<_Ret, _Fn, _Args...>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_invocable_r_v
+    = is_invocable_r<_Ret, _Fn, _Args...>::value;

-// is_nothrow_callable
+// is_nothrow_invocable

 template <class _Fn, class ..._Args>
 struct _LIBCPP_TEMPLATE_VIS is_nothrow_invocable
@ -4473,10 +4561,12 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_invocable_r
    : integral_constant<bool, __nothrow_invokable_r<_Ret, _Fn, _Args...>::value> {};

 template <class _Fn, class ..._Args>
-constexpr bool is_nothrow_invocable_v = is_nothrow_invocable<_Fn, _Args...>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_nothrow_invocable_v
+    = is_nothrow_invocable<_Fn, _Args...>::value;

 template <class _Ret, class _Fn, class ..._Args>
-constexpr bool is_nothrow_invocable_r_v = is_nothrow_invocable_r<_Ret, _Fn, _Args...>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_nothrow_invocable_r_v
+    = is_nothrow_invocable_r<_Ret, _Fn, _Args...>::value;

 #endif // _LIBCPP_STD_VER > 14

@ -4616,16 +4706,20 @@ struct _LIBCPP_TEMPLATE_VIS is_nothrow_swappable
 };

 template <class _Tp, class _Up>
-constexpr bool is_swappable_with_v = is_swappable_with<_Tp, _Up>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_swappable_with_v
+    = is_swappable_with<_Tp, _Up>::value;

 template <class _Tp>
-constexpr bool is_swappable_v = is_swappable<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_swappable_v
+    = is_swappable<_Tp>::value;

 template <class _Tp, class _Up>
-constexpr bool is_nothrow_swappable_with_v = is_nothrow_swappable_with<_Tp, _Up>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_nothrow_swappable_with_v
+    = is_nothrow_swappable_with<_Tp, _Up>::value;

 template <class _Tp>
-constexpr bool is_nothrow_swappable_v = is_nothrow_swappable<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr bool is_nothrow_swappable_v
+    = is_nothrow_swappable<_Tp>::value;

 #endif // _LIBCPP_STD_VER > 14

@ -4742,15 +4836,21 @@ template <class...> using void_t = void;
 # ifndef _LIBCPP_HAS_NO_VARIADICS
 template <class... _Args>
 struct conjunction : __and_<_Args...> {};
-template<class... _Args> constexpr bool conjunction_v = conjunction<_Args...>::value;
+template<class... _Args>
+_LIBCPP_INLINE_VAR constexpr bool conjunction_v
+    = conjunction<_Args...>::value;

 template <class... _Args>
 struct disjunction : __or_<_Args...> {};
-template<class... _Args> constexpr bool disjunction_v = disjunction<_Args...>::value;
+template<class... _Args>
+_LIBCPP_INLINE_VAR constexpr bool disjunction_v
+    = disjunction<_Args...>::value;

 template <class _Tp>
 struct negation : __not_<_Tp> {};
-template<class _Tp> constexpr bool negation_v = negation<_Tp>::value;
+template<class _Tp>
+_LIBCPP_INLINE_VAR constexpr bool negation_v
+    = negation<_Tp>::value;
 # endif // _LIBCPP_HAS_NO_VARIADICS
 #endif  // _LIBCPP_STD_VER > 14

--- a/contrib/libc++/include/utility
+++ b/contrib/libc++/include/utility
@ -99,7 +99,7 @@ void
 swap(pair<T1, T2>& x, pair<T1, T2>& y) noexcept(noexcept(x.swap(y)));

 struct piecewise_construct_t { };
-constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();
+inline constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();

 template <class T> class tuple_size;
 template <size_t I, class T> class tuple_element;
@ -296,7 +296,7 @@ struct _LIBCPP_TEMPLATE_VIS piecewise_construct_t { };
 #if defined(_LIBCPP_CXX03_LANG) || defined(_LIBCPP_BUILDING_UTILITY)
 extern const piecewise_construct_t piecewise_construct;// = piecewise_construct_t();
 #else
-constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();
+/* _LIBCPP_INLINE_VAR */ constexpr piecewise_construct_t piecewise_construct = piecewise_construct_t();
 #endif

 #if defined(_LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR)
@ -904,30 +904,21 @@ _T1 exchange(_T1& __obj, _T2 && __new_value)
 struct _LIBCPP_TYPE_VIS in_place_t {
    explicit in_place_t() = default;
 };
-#ifndef _LIBCPP_HAS_NO_INLINE_VARIABLES
-inline
-#endif
-constexpr in_place_t in_place{};
+_LIBCPP_INLINE_VAR constexpr in_place_t in_place{};

 template <class _Tp>
 struct _LIBCPP_TEMPLATE_VIS in_place_type_t {
    explicit in_place_type_t() = default;
 };
 template <class _Tp>
-#ifndef _LIBCPP_HAS_NO_INLINE_VARIABLES
-inline
-#endif
-constexpr in_place_type_t<_Tp> in_place_type{};
+_LIBCPP_INLINE_VAR constexpr in_place_type_t<_Tp> in_place_type{};

 template <size_t _Idx>
 struct _LIBCPP_TYPE_VIS in_place_index_t {
    explicit in_place_index_t() = default;
 };
 template <size_t _Idx>
-#ifndef _LIBCPP_HAS_NO_INLINE_VARIABLES
-inline
-#endif
-constexpr in_place_index_t<_Idx> in_place_index{};
+_LIBCPP_INLINE_VAR constexpr in_place_index_t<_Idx> in_place_index{};

 template <class _Tp> struct __is_inplace_type_imp : false_type {};
 template <class _Tp> struct __is_inplace_type_imp<in_place_type_t<_Tp>> : true_type {};
--- a/contrib/libc++/include/variant
+++ b/contrib/libc++/include/variant
@ -76,7 +76,7 @@ namespace std {
  template <class T> struct variant_size; // undefined

  template <class T>
-  constexpr size_t variant_size_v = variant_size<T>::value;
+  inline constexpr size_t variant_size_v = variant_size<T>::value;

  template <class T> struct variant_size<const T>;
  template <class T> struct variant_size<volatile T>;
@ -97,7 +97,7 @@ namespace std {
  template <size_t I, class... Types>
  struct variant_alternative<I, variant<Types...>>;

-  constexpr size_t variant_npos = -1;
+  inline constexpr size_t variant_npos = -1;

  // 20.7.4, value access
  template <class T, class... Types>
@ -246,7 +246,7 @@ template <class _Tp>
 struct _LIBCPP_TEMPLATE_VIS variant_size;

 template <class _Tp>
-constexpr size_t variant_size_v = variant_size<_Tp>::value;
+_LIBCPP_INLINE_VAR constexpr size_t variant_size_v = variant_size<_Tp>::value;

 template <class _Tp>
 struct _LIBCPP_TEMPLATE_VIS variant_size<const _Tp> : variant_size<_Tp> {};
@ -286,7 +286,7 @@ struct _LIBCPP_TEMPLATE_VIS variant_alternative<_Ip, variant<_Types...>> {
  using type = __type_pack_element<_Ip, _Types...>;
 };

-constexpr size_t variant_npos = static_cast<size_t>(-1);
+_LIBCPP_INLINE_VAR constexpr size_t variant_npos = static_cast<size_t>(-1);

 constexpr int __choose_index_type(unsigned int __num_elem) {
  if (__num_elem < std::numeric_limits<unsigned char>::max())
--- a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
@ -325,9 +325,9 @@ class TargetPassConfig : public ImmutablePass {
  virtual bool isGlobalISelEnabled() const;

  /// Check whether or not GlobalISel should abort on error.
-  /// When this is disable, GlobalISel will fall back on SDISel instead of
+  /// When this is disabled, GlobalISel will fall back on SDISel instead of
  /// erroring out.
-  virtual bool isGlobalISelAbortEnabled() const;
+  bool isGlobalISelAbortEnabled() const;

  /// Check whether or not a diagnostic should be emitted when GlobalISel
  /// uses the fallback path. In other words, it will emit a diagnostic
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@ -218,6 +218,7 @@ class Function : public GlobalObject, public ilist_node<Function> {
                 Attribute::get(getContext(), Kind, Val));
  }

+  /// @brief Add function attributes to this function.
  void addFnAttr(Attribute Attr) {
    addAttribute(AttributeList::FunctionIndex, Attr);
  }
@ -268,6 +269,8 @@ class Function : public GlobalObject, public ilist_node<Function> {
  bool hasFnAttribute(Attribute::AttrKind Kind) const {
    return AttributeSets.hasFnAttribute(Kind);
  }
+
+  /// @brief Return true if the function has the attribute.
  bool hasFnAttribute(StringRef Kind) const {
    return AttributeSets.hasFnAttribute(Kind);
  }
@ -276,6 +279,8 @@ class Function : public GlobalObject, public ilist_node<Function> {
  Attribute getFnAttribute(Attribute::AttrKind Kind) const {
    return getAttribute(AttributeList::FunctionIndex, Kind);
  }
+
+  /// @brief Return the attribute for the given attribute kind.
  Attribute getFnAttribute(StringRef Kind) const {
    return getAttribute(AttributeList::FunctionIndex, Kind);
  }
@ -342,10 +347,12 @@ class Function : public GlobalObject, public ilist_node<Function> {
    return getAttributes().hasParamAttribute(ArgNo, Kind);
  }

+  /// @brief gets the attribute from the list of attributes.
  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
    return AttributeSets.getAttribute(i, Kind);
  }

+  /// @brief gets the attribute from the list of attributes.
  Attribute getAttribute(unsigned i, StringRef Kind) const {
    return AttributeSets.getAttribute(i, Kind);
  }
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@ -304,7 +304,8 @@ class AMDGPUImageLoad<bit NoMem = 0> : Intrinsic <
   llvm_i1_ty,        // slc(imm)
   llvm_i1_ty,        // lwe(imm)
   llvm_i1_ty],       // da(imm)
-  !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+  !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+  !if(NoMem, [], [SDNPMemOperand])>;

 def int_amdgcn_image_load : AMDGPUImageLoad;
 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic <
   llvm_i1_ty,        // slc(imm)
   llvm_i1_ty,        // lwe(imm)
   llvm_i1_ty],       // da(imm)
-  []>;
+  [IntrWriteMem], "", [SDNPMemOperand]>;

 def int_amdgcn_image_store : AMDGPUImageStore;
 def int_amdgcn_image_store_mip : AMDGPUImageStore;
@ -336,7 +337,8 @@ class AMDGPUImageSample<bit NoMem = 0> : Intrinsic <
     llvm_i1_ty,        // slc(imm)
     llvm_i1_ty,        // lwe(imm)
     llvm_i1_ty],       // da(imm)
-     !if(NoMem, [IntrNoMem], [IntrReadMem])>;
+     !if(NoMem, [IntrNoMem], [IntrReadMem]), "",
+     !if(NoMem, [], [SDNPMemOperand])>;

 // Basic sample
 def int_amdgcn_image_sample : AMDGPUImageSample;
@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic <
   llvm_i1_ty,        // r128(imm)
   llvm_i1_ty,        // da(imm)
   llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;

 def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic;
 def int_amdgcn_image_atomic_add : AMDGPUImageAtomic;
@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic <
   llvm_i1_ty,        // r128(imm)
   llvm_i1_ty,        // da(imm)
   llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;

 class AMDGPUBufferLoad : Intrinsic <
  [llvm_anyfloat_ty],
@ -460,7 +462,7 @@ class AMDGPUBufferLoad : Intrinsic <
   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
   llvm_i1_ty,        // glc(imm)
   llvm_i1_ty],       // slc(imm)
-  [IntrReadMem]>;
+  [IntrReadMem], "", [SDNPMemOperand]>;
 def int_amdgcn_buffer_load_format : AMDGPUBufferLoad;
 def int_amdgcn_buffer_load : AMDGPUBufferLoad;

@ -472,7 +474,7 @@ class AMDGPUBufferStore : Intrinsic <
   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
   llvm_i1_ty,        // glc(imm)
   llvm_i1_ty],       // slc(imm)
-  [IntrWriteMem]>;
+  [IntrWriteMem], "", [SDNPMemOperand]>;
 def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
 def int_amdgcn_buffer_store : AMDGPUBufferStore;

@ -487,7 +489,7 @@ def int_amdgcn_tbuffer_load : Intrinsic <
     llvm_i32_ty,     // nfmt(imm)
     llvm_i1_ty,     // glc(imm)
     llvm_i1_ty],    // slc(imm)
-    []>;
+    [IntrReadMem], "", [SDNPMemOperand]>;

 def int_amdgcn_tbuffer_store : Intrinsic <
    [],
@ -501,7 +503,7 @@ def int_amdgcn_tbuffer_store : Intrinsic <
     llvm_i32_ty,    // nfmt(imm)
     llvm_i1_ty,     // glc(imm)
     llvm_i1_ty],    // slc(imm)
-    []>;
+    [IntrWriteMem], "", [SDNPMemOperand]>;

 class AMDGPUBufferAtomic : Intrinsic <
  [llvm_i32_ty],
@ -510,7 +512,7 @@ class AMDGPUBufferAtomic : Intrinsic <
   llvm_i32_ty,       // vindex(VGPR)
   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
   llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;
 def int_amdgcn_buffer_atomic_swap : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_add : AMDGPUBufferAtomic;
 def int_amdgcn_buffer_atomic_sub : AMDGPUBufferAtomic;
@ -529,7 +531,7 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
   llvm_i32_ty,       // vindex(VGPR)
   llvm_i32_ty,       // offset(SGPR/VGPR/imm)
   llvm_i1_ty],       // slc(imm)
-  []>;
+  [], "", [SDNPMemOperand]>;

 // Uses that do not set the done bit should set IntrWriteMem on the
 // call site.
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@ -1862,6 +1862,33 @@ using TokenizerCallback = void (*)(StringRef Source, StringSaver &Saver,
                                   SmallVectorImpl<const char *> &NewArgv,
                                   bool MarkEOLs);

+/// Tokenizes content of configuration file.
+///
+/// \param [in] Source The string representing content of config file.
+/// \param [in] Saver Delegates back to the caller for saving parsed strings.
+/// \param [out] NewArgv All parsed strings are appended to NewArgv.
+/// \param [in] MarkEOLs Added for compatibility with TokenizerCallback.
+///
+/// It works like TokenizeGNUCommandLine with ability to skip comment lines.
+///
+void tokenizeConfigFile(StringRef Source, StringSaver &Saver,
+                        SmallVectorImpl<const char *> &NewArgv,
+                        bool MarkEOLs = false);
+
+/// Reads command line options from the given configuration file.
+///
+/// \param [in] CfgFileName Path to configuration file.
+/// \param [in] Saver  Objects that saves allocated strings.
+/// \param [out] Argv Array to which the read options are added.
+/// \return true if the file was successfully read.
+///
+/// It reads content of the specified file, tokenizes it and expands "@file"
+/// commands resolving file names in them relative to the directory where
+/// CfgFilename resides.
+///
+bool readConfigFile(StringRef CfgFileName, StringSaver &Saver,
+                    SmallVectorImpl<const char *> &Argv);
+
 /// \brief Expand response files on a command line recursively using the given
 /// StringSaver and tokenization strategy.  Argv should contain the command line
 /// before expansion and will be modified in place. If requested, Argv will
--- a/contrib/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm/include/llvm/Support/TargetRegistry.h
@ -123,8 +123,8 @@ class Target {
  using AsmPrinterCtorTy = AsmPrinter *(*)(
      TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
  using MCAsmBackendCtorTy = MCAsmBackend *(*)(const Target &T,
+                                               const MCSubtargetInfo &STI,
                                               const MCRegisterInfo &MRI,
-                                               const Triple &TT, StringRef CPU,
                                               const MCTargetOptions &Options);
  using MCAsmParserCtorTy = MCTargetAsmParser *(*)(
      const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
@ -381,15 +381,12 @@ class Target {
  }

  /// createMCAsmBackend - Create a target specific assembly parser.
-  ///
-  /// \param TheTriple The target triple string.
-  MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI,
-                                   StringRef TheTriple, StringRef CPU,
-                                   const MCTargetOptions &Options)
-                                   const {
+  MCAsmBackend *createMCAsmBackend(const MCSubtargetInfo &STI,
+                                   const MCRegisterInfo &MRI,
+                                   const MCTargetOptions &Options) const {
    if (!MCAsmBackendCtorFn)
      return nullptr;
-    return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU, Options);
+    return MCAsmBackendCtorFn(*this, STI, MRI, Options);
  }

  /// createMCAsmParser - Create a target specific assembly parser.
@ -1106,10 +1103,10 @@ template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
  }

 private:
-  static MCAsmBackend *Allocator(const Target &T, const MCRegisterInfo &MRI,
-                                 const Triple &TheTriple, StringRef CPU,
+  static MCAsmBackend *Allocator(const Target &T, const MCSubtargetInfo &STI,
+                                 const MCRegisterInfo &MRI,
                                 const MCTargetOptions &Options) {
-    return new MCAsmBackendImpl(T, MRI, TheTriple, CPU);
+    return new MCAsmBackendImpl(T, STI, MRI);
  }
 };

--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@ -264,7 +264,8 @@ template <typename LoopPassT>
 class FunctionToLoopPassAdaptor
    : public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> {
 public:
-  explicit FunctionToLoopPassAdaptor(LoopPassT Pass) : Pass(std::move(Pass)) {
+  explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false)
+      : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging) {
    LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
    LoopCanonicalizationFPM.addPass(LCSSAPass());
  }
@ -384,8 +385,8 @@ class FunctionToLoopPassAdaptor
 /// adaptor.
 template <typename LoopPassT>
 FunctionToLoopPassAdaptor<LoopPassT>
-createFunctionToLoopPassAdaptor(LoopPassT Pass) {
-  return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass));
+createFunctionToLoopPassAdaptor(LoopPassT Pass, bool DebugLogging = false) {
+  return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), DebugLogging);
 }

 /// \brief Pass for printing a loop's contents as textual IR.
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@ -826,7 +826,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
                                          MaxRecurse))
    return V;

-  // Mul distributes over Add.  Try some generic simplifications based on this.
+  // Mul distributes over Add. Try some generic simplifications based on this.
  if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
                             Q, MaxRecurse))
    return V;
@ -3838,12 +3838,13 @@ Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
  // Fold into undef if index is out of bounds.
  if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
    uint64_t NumElements = cast<VectorType>(Vec->getType())->getNumElements();
-
    if (CI->uge(NumElements))
      return UndefValue::get(Vec->getType());
  }

-  // TODO: We should also fold if index is iteslf an undef.
+  // If index is undef, it might be out of bounds (see above case)
+  if (isa<UndefValue>(Idx))
+    return UndefValue::get(Vec->getType());

  return nullptr;
 }
@ -3896,10 +3897,13 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQ

  // If extracting a specified index from the vector, see if we can recursively
  // find a previously computed scalar that was inserted into the vector.
-  if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
-    if (IdxC->getValue().ule(Vec->getType()->getVectorNumElements()))
-      if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
-        return Elt;
+  if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
+    if (IdxC->getValue().uge(Vec->getType()->getVectorNumElements()))
+      // definitely out of bounds, thus undefined result
+      return UndefValue::get(Vec->getType()->getVectorElementType());
+    if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
+      return Elt;
+  }

  // An undef extract index can be arbitrarily chosen to be an out-of-range
  // index value, which would result in the instruction being undef.
@ -4489,28 +4493,55 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
      }
    }

+    Value *IIOperand = *ArgBegin;
+    Value *X;
    switch (IID) {
    case Intrinsic::fabs: {
-      if (SignBitMustBeZero(*ArgBegin, Q.TLI))
-        return *ArgBegin;
+      if (SignBitMustBeZero(IIOperand, Q.TLI))
+        return IIOperand;
      return nullptr;
    }
    case Intrinsic::bswap: {
-      Value *IIOperand = *ArgBegin;
-      Value *X = nullptr;
      // bswap(bswap(x)) -> x
      if (match(IIOperand, m_BSwap(m_Value(X))))
        return X;
      return nullptr;
    }
    case Intrinsic::bitreverse: {
-      Value *IIOperand = *ArgBegin;
-      Value *X = nullptr;
      // bitreverse(bitreverse(x)) -> x
      if (match(IIOperand, m_BitReverse(m_Value(X))))
        return X;
      return nullptr;
    }
+    case Intrinsic::exp: {
+      // exp(log(x)) -> x
+      if (Q.CxtI->isFast() &&
+          match(IIOperand, m_Intrinsic<Intrinsic::log>(m_Value(X))))
+        return X;
+      return nullptr;
+    }
+    case Intrinsic::exp2: {
+      // exp2(log2(x)) -> x
+      if (Q.CxtI->isFast() &&
+          match(IIOperand, m_Intrinsic<Intrinsic::log2>(m_Value(X))))
+        return X;
+      return nullptr;
+    }
+    case Intrinsic::log: {
+      // log(exp(x)) -> x
+      if (Q.CxtI->isFast() &&
+          match(IIOperand, m_Intrinsic<Intrinsic::exp>(m_Value(X))))
+        return X;
+      return nullptr;
+    }
+    case Intrinsic::log2: {
+      // log2(exp2(x)) -> x
+      if (Q.CxtI->isFast() &&
+          match(IIOperand, m_Intrinsic<Intrinsic::exp2>(m_Value(X)))) {
+        return X;
+      }
+      return nullptr;
+    }
    default:
      return nullptr;
    }
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@ -2358,7 +2358,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
      FoundMatch = true;
    }
  if (FoundMatch)
-    return getAddExpr(Ops, Flags);
+    return getAddExpr(Ops, Flags, Depth + 1);

  // Check for truncates. If all the operands are truncated from the same
  // type, see if factoring out the truncate would permit the result to be
@ -6402,9 +6402,8 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
  BasicBlock *Header = L->getHeader();

  // Push all Loop-header PHIs onto the Worklist stack.
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    Worklist.push_back(PN);
+  for (PHINode &PN : Header->phis())
+    Worklist.push_back(&PN);
 }

 const ScalarEvolution::BackedgeTakenInfo &
@ -7638,12 +7637,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
  if (!Latch)
    return nullptr;

-  for (auto &I : *Header) {
-    PHINode *PHI = dyn_cast<PHINode>(&I);
-    if (!PHI) break;
-    auto *StartCST = getOtherIncomingValue(PHI, Latch);
-    if (!StartCST) continue;
-    CurrentIterVals[PHI] = StartCST;
+  for (PHINode &PHI : Header->phis()) {
+    if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
+      CurrentIterVals[&PHI] = StartCST;
  }
  if (!CurrentIterVals.count(PN))
    return RetVal = nullptr;
@ -7720,13 +7716,9 @@ const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
  BasicBlock *Latch = L->getLoopLatch();
  assert(Latch && "Should follow from NumIncomingValues == 2!");

-  for (auto &I : *Header) {
-    PHINode *PHI = dyn_cast<PHINode>(&I);
-    if (!PHI)
-      break;
-    auto *StartCST = getOtherIncomingValue(PHI, Latch);
-    if (!StartCST) continue;
-    CurrentIterVals[PHI] = StartCST;
+  for (PHINode &PHI : Header->phis()) {
+    if (auto *StartCST = getOtherIncomingValue(&PHI, Latch))
+      CurrentIterVals[&PHI] = StartCST;
  }
  if (!CurrentIterVals.count(PN))
    return getCouldNotCompute();
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@ -1154,16 +1154,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
        IVIncInsertLoop &&
        SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());

-    for (auto &I : *L->getHeader()) {
-      auto *PN = dyn_cast<PHINode>(&I);
-      // Found first non-phi, the rest of instructions are also not Phis.
-      if (!PN)
-        break;
-
-      if (!SE.isSCEVable(PN->getType()))
+    for (PHINode &PN : L->getHeader()->phis()) {
+      if (!SE.isSCEVable(PN.getType()))
        continue;

-      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
      if (!PhiSCEV)
        continue;

@ -1175,16 +1170,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
          continue;

      Instruction *TempIncV =
-          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+          cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock));

      // Check whether we can reuse this PHI node.
      if (LSRMode) {
-        if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
+        if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
          continue;
        if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
          continue;
      } else {
-        if (!isNormalAddRecExprPHI(PN, TempIncV, L))
+        if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
          continue;
      }

@ -1193,7 +1188,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
        IncV = TempIncV;
        TruncTy = nullptr;
        InvertStep = false;
-        AddRecPhiMatch = PN;
+        AddRecPhiMatch = &PN;
        break;
      }

@ -1203,7 +1198,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
          canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
        // Record the phi node. But don't stop we might find an exact match
        // later.
-        AddRecPhiMatch = PN;
+        AddRecPhiMatch = &PN;
        IncV = TempIncV;
        TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
      }
@ -1863,12 +1858,8 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                  const TargetTransformInfo *TTI) {
  // Find integer phis in order of increasing width.
  SmallVector<PHINode*, 8> Phis;
-  for (auto &I : *L->getHeader()) {
-    if (auto *PN = dyn_cast<PHINode>(&I))
-      Phis.push_back(PN);
-    else
-      break;
-  }
+  for (PHINode &PN : L->getHeader()->phis())
+    Phis.push_back(&PN);

  if (TTI)
    std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@ -2264,9 +2264,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
    // ashr X, C   -> adds C sign bits.  Vectors too.
    const APInt *ShAmt;
    if (match(U->getOperand(1), m_APInt(ShAmt))) {
-      unsigned ShAmtLimited = ShAmt->getZExtValue();
-      if (ShAmtLimited >= TyBits)
+      if (ShAmt->uge(TyBits))
        break;  // Bad shift.
+      unsigned ShAmtLimited = ShAmt->getZExtValue();
      Tmp += ShAmtLimited;
      if (Tmp > TyBits) Tmp = TyBits;
    }
@ -2277,9 +2277,9 @@ static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
    if (match(U->getOperand(1), m_APInt(ShAmt))) {
      // shl destroys sign bits.
      Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+      if (ShAmt->uge(TyBits) ||      // Bad shift.
+          ShAmt->uge(Tmp)) break;    // Shifted all sign bits out.
      Tmp2 = ShAmt->getZExtValue();
-      if (Tmp2 >= TyBits ||      // Bad shift.
-          Tmp2 >= Tmp) break;    // Shifted all sign bits out.
      return Tmp - Tmp2;
    }
    break;
@ -4161,6 +4161,81 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
  return {SPF_UNKNOWN, SPNB_NA, false};
 }

+/// Recognize variations of:
+///   a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
+static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
+                                               Value *CmpLHS, Value *CmpRHS,
+                                               Value *TrueVal, Value *FalseVal) {
+  // TODO: Allow FP min/max with nnan/nsz.
+  assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
+
+  Value *A, *B;
+  SelectPatternResult L = matchSelectPattern(TrueVal, A, B);
+  if (!SelectPatternResult::isMinOrMax(L.Flavor))
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  Value *C, *D;
+  SelectPatternResult R = matchSelectPattern(FalseVal, C, D);
+  if (L.Flavor != R.Flavor)
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // Match the compare to the min/max operations of the select operands.
+  switch (L.Flavor) {
+  case SPF_SMIN:
+    if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_SMAX:
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_UMIN:
+    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  case SPF_UMAX:
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      std::swap(CmpLHS, CmpRHS);
+    }
+    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+      break;
+    return {SPF_UNKNOWN, SPNB_NA, false};
+  default:
+    llvm_unreachable("Bad flavor while matching min/max");
+  }
+
+  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
+  if (CmpLHS == A && CmpRHS == C && D == B)
+    return {L.Flavor, SPNB_NA, false};
+
+  // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
+  if (CmpLHS == A && CmpRHS == D && C == B)
+    return {L.Flavor, SPNB_NA, false};
+
+  // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
+  if (CmpLHS == B && CmpRHS == C && D == A)
+    return {L.Flavor, SPNB_NA, false};
+
+  // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
+  if (CmpLHS == B && CmpRHS == D && C == A)
+    return {L.Flavor, SPNB_NA, false};
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
 /// Match non-obvious integer minimum and maximum sequences.
 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
                                       Value *CmpLHS, Value *CmpRHS,
@ -4174,6 +4249,10 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
    return SPR;

+  SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
+  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
+    return SPR;
+  
  if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
    return {SPF_UNKNOWN, SPNB_NA, false};

--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@ -633,16 +633,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
    if (DestBBPred == BB)
      continue;

-    bool HasAllSameValue = true;
-    BasicBlock::const_iterator DestBBI = DestBB->begin();
-    while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
-      if (DestPN->getIncomingValueForBlock(BB) !=
-          DestPN->getIncomingValueForBlock(DestBBPred)) {
-        HasAllSameValue = false;
-        break;
-      }
-    }
-    if (HasAllSameValue)
+    if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
+          return DestPN.getIncomingValueForBlock(BB) ==
+                 DestPN.getIncomingValueForBlock(DestBBPred);
+        }))
      SameIncomingValueBBs.insert(DestBBPred);
  }

@ -672,9 +666,8 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
  // the successor.  If there are more complex condition (e.g. preheaders),
  // don't mess around with them.
-  BasicBlock::const_iterator BBI = BB->begin();
-  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
-    for (const User *U : PN->users()) {
+  for (const PHINode &PN : BB->phis()) {
+    for (const User *U : PN.users()) {
      const Instruction *UI = cast<Instruction>(U);
      if (UI->getParent() != DestBB || !isa<PHINode>(UI))
        return false;
@ -713,10 +706,9 @@ bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
    if (BBPreds.count(Pred)) {   // Common predecessor?
-      BBI = DestBB->begin();
-      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
-        const Value *V1 = PN->getIncomingValueForBlock(Pred);
-        const Value *V2 = PN->getIncomingValueForBlock(BB);
+      for (const PHINode &PN : DestBB->phis()) {
+        const Value *V1 = PN.getIncomingValueForBlock(Pred);
+        const Value *V2 = PN.getIncomingValueForBlock(BB);

        // If V2 is a phi node in BB, look up what the mapped value will be.
        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
@ -759,11 +751,9 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {

  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
  // to handle the new incoming edges it is about to have.
-  PHINode *PN;
-  for (BasicBlock::iterator BBI = DestBB->begin();
-       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+  for (PHINode &PN : DestBB->phis()) {
    // Remove the incoming value for BB, and remember it.
-    Value *InVal = PN->removeIncomingValue(BB, false);
+    Value *InVal = PN.removeIncomingValue(BB, false);

    // Two options: either the InVal is a phi node defined in BB or it is some
    // value that dominates BB.
@ -771,17 +761,17 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
    if (InValPhi && InValPhi->getParent() == BB) {
      // Add all of the input values of the input PHI as inputs of this phi.
      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
-        PN->addIncoming(InValPhi->getIncomingValue(i),
-                        InValPhi->getIncomingBlock(i));
+        PN.addIncoming(InValPhi->getIncomingValue(i),
+                       InValPhi->getIncomingBlock(i));
    } else {
      // Otherwise, add one instance of the dominating value for each edge that
      // we will be adding.
      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
-          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+          PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
      } else {
        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-          PN->addIncoming(InVal, *PI);
+          PN.addIncoming(InVal, *PI);
      }
    }
  }
@ -6497,22 +6487,16 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
      std::swap(TBB, FBB);

    // Replace the old BB with the new BB.
-    for (auto &I : *TBB) {
-      PHINode *PN = dyn_cast<PHINode>(&I);
-      if (!PN)
-        break;
+    for (PHINode &PN : TBB->phis()) {
      int i;
-      while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
-        PN->setIncomingBlock(i, TmpBB);
+      while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
+        PN.setIncomingBlock(i, TmpBB);
    }

    // Add another incoming edge form the new BB.
-    for (auto &I : *FBB) {
-      PHINode *PN = dyn_cast<PHINode>(&I);
-      if (!PN)
-        break;
-      auto *Val = PN->getIncomingValueForBlock(&BB);
-      PN->addIncoming(Val, TmpBB);
+    for (PHINode &PN : FBB->phis()) {
+      auto *Val = PN.getIncomingValueForBlock(&BB);
+      PN.addIncoming(Val, TmpBB);
    }

    // Update the branch weights (from SelectionDAGBuilder::
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@ -815,7 +815,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
  if (CI.isInlineAsm())
    return translateInlineAsm(CI, MIRBuilder);

-  if (!F || !F->isIntrinsic()) {
+  Intrinsic::ID ID = Intrinsic::not_intrinsic;
+  if (F && F->isIntrinsic()) {
+    ID = F->getIntrinsicID();
+    if (TII && ID == Intrinsic::not_intrinsic)
+      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+  }
+
+  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
    SmallVector<unsigned, 8> Args;
    for (auto &Arg: CI.arg_operands())
@ -827,10 +834,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
    });
  }

-  Intrinsic::ID ID = F->getIntrinsicID();
-  if (TII && ID == Intrinsic::not_intrinsic)
-    ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
-
  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");

  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
--- a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@ -813,7 +813,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {

    unsigned Zero = MRI.createGenericVirtualRegister(Ty);
    MIRBuilder.buildConstant(Zero, 0);
-    MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+
+    // For *signed* multiply, overflow is detected by checking:
+    // (hi != (lo >> bitwidth-1))
+    if (Opcode == TargetOpcode::G_SMULH) {
+      unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
+      unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
+      MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
+      MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
+        .addDef(Shifted)
+        .addUse(Res)
+        .addUse(ShiftAmt);
+      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
+    } else {
+      MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+    }
    MI.eraseFromParent();
    return Legalized;
  }
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@ -136,8 +136,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
      MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);

    MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
-                                       Options.MCOptions);
+        getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
    auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
    MCStreamer *S = getTarget().createAsmStreamer(
        Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@ -151,8 +150,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
    // emission fails.
    MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, Context);
    MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
-                                       Options.MCOptions);
+        getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
    if (!MCE || !MAB)
      return true;

@ -225,17 +223,16 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,

  // Create the code emitter for the target if it exists.  If not, .o file
  // emission fails.
+  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
  const MCRegisterInfo &MRI = *getMCRegisterInfo();
  MCCodeEmitter *MCE =
      getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
  MCAsmBackend *MAB =
-      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
-                                     Options.MCOptions);
+      getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
  if (!MCE || !MAB)
    return true;

  const Triple &T = getTargetTriple();
-  const MCSubtargetInfo &STI = *getMCSubtargetInfo();
  std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
      T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), Out,
      std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@ -242,8 +242,11 @@ class UserValue {
    // We are storing a MachineOperand outside a MachineInstr.
    locations.back().clearParent();
    // Don't store def operands.
-    if (locations.back().isReg())
+    if (locations.back().isReg()) {
+      if (locations.back().isDef())
+        locations.back().setIsDead(false);
      locations.back().setIsUse();
+    }
    return locations.size() - 1;
  }

--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -3850,7 +3850,6 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
      return false;
    }
    case ISD::ZERO_EXTEND:
-    case ISD::ANY_EXTEND:
    case ISD::AssertZext: {
      unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
      EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@ -13783,30 +13782,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    }
  }

-  // Deal with elidable overlapping chained stores.
-  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain))
-    if (OptLevel != CodeGenOpt::None && ST->isUnindexed() &&
-        ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() &&
-        !ST1->getBasePtr().isUndef() && !ST->isVolatile()) {
-      BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG);
-      BaseIndexOffset ST1BasePtr =
-          BaseIndexOffset::match(ST1->getBasePtr(), DAG);
-      unsigned STBytes = ST->getMemoryVT().getStoreSize();
-      unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize();
-      int64_t PtrDiff;
-      // If this is a store who's preceeding store to a subset of the same
-      // memory and no one other node is chained to that store we can
-      // effectively drop the store. Do not remove stores to undef as they may
-      // be used as data sinks.
+  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
+    if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
+        !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
+        ST->getMemoryVT() == ST1->getMemoryVT()) {
+      // If this is a store followed by a store with the same value to the same
+      // location, then the store is dead/noop.
+      if (ST1->getValue() == Value) {
+        // The store is dead, remove it.
+        return Chain;
+      }

-      if (((ST->getBasePtr() == ST1->getBasePtr()) &&
-           (ST->getValue() == ST1->getValue())) ||
-          (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) &&
-           (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) {
+      // If this is a store who's preceeding store to the same location
+      // and no one other node is chained to that store we can effectively
+      // drop the store. Do not remove stores to undef as they may be used as
+      // data sinks.
+      if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
+          !ST1->getBasePtr().isUndef()) {
+        // ST1 is fully overwritten and can be elided. Combine with it's chain
+        // value.
        CombineTo(ST1, ST1->getChain());
-        return SDValue(N, 0);
+        return SDValue();
      }
    }
+  }

  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
  // truncating store.  We can do this even if this is already a truncstore.
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@ -2051,11 +2051,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
    // At this point we know that there is a 1-1 correspondence between LLVM PHI
    // nodes and Machine PHI nodes, but the incoming operands have not been
    // emitted yet.
-    for (BasicBlock::const_iterator I = SuccBB->begin();
-         const auto *PN = dyn_cast<PHINode>(I); ++I) {
-
+    for (const PHINode &PN : SuccBB->phis()) {
      // Ignore dead phi's.
-      if (PN->use_empty())
+      if (PN.use_empty())
        continue;

      // Only handle legal types. Two interesting things to note here. First,
@ -2064,7 +2062,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
      // own moves. Second, this check is necessary because FastISel doesn't
      // use CreateRegs to create registers, so it always creates
      // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
+      EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true);
      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
        // Handle integer promotions, though, because they're common and easy.
        if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
@ -2073,11 +2071,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
        }
      }

-      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);

      // Set the DebugLoc for the copy. Prefer the location of the operand
      // if there is one; use the location of the PHI otherwise.
-      DbgLoc = PN->getDebugLoc();
+      DbgLoc = PN.getDebugLoc();
      if (const auto *Inst = dyn_cast<Instruction>(PHIOp))
        DbgLoc = Inst->getDebugLoc();

--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@ -257,20 +257,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,

    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
    // appropriate.
-    for (BasicBlock::const_iterator I = BB.begin();
-         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      if (PN->use_empty()) continue;
-
-      // Skip empty types
-      if (PN->getType()->isEmptyTy())
+    for (const PHINode &PN : BB.phis()) {
+      if (PN.use_empty())
        continue;

-      DebugLoc DL = PN->getDebugLoc();
-      unsigned PHIReg = ValueMap[PN];
+      // Skip empty types
+      if (PN.getType()->isEmptyTy())
+        continue;
+
+      DebugLoc DL = PN.getDebugLoc();
+      unsigned PHIReg = ValueMap[&PN];
      assert(PHIReg && "PHI node does not have an assigned virtual register!");

      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
+      ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs);
      for (EVT VT : ValueVTs) {
        unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
        const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@ -139,14 +139,14 @@ class VectorLegalizer {

  /// \brief Implements [SU]INT_TO_FP vector promotion.
  ///
-  /// This is a [zs]ext of the input operand to the next size up.
+  /// This is a [zs]ext of the input operand to a larger integer type.
  SDValue PromoteINT_TO_FP(SDValue Op);

  /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
  ///
-  /// It is promoted to the next size up integer type.  The result is then
+  /// It is promoted to a larger integer type.  The result is then
  /// truncated back to the original type.
-  SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+  SDValue PromoteFP_TO_INT(SDValue Op);

 public:
  VectorLegalizer(SelectionDAG& dag) :
@ -431,7 +431,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
  case ISD::FP_TO_UINT:
  case ISD::FP_TO_SINT:
    // Promote the operation by extending the operand.
-    return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+    return PromoteFP_TO_INT(Op);
  }

  // There are currently two cases of vector promotion:
@ -472,20 +472,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
  // INT_TO_FP operations may require the input operand be promoted even
  // when the type is otherwise legal.
-  EVT VT = Op.getOperand(0).getValueType();
-  assert(Op.getNode()->getNumValues() == 1 &&
-         "Can't promote a vector with multiple results!");
+  MVT VT = Op.getOperand(0).getSimpleValueType();
+  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+         "Vectors have different number of elements!");

-  // Normal getTypeToPromoteTo() doesn't work here, as that will promote
-  // by widening the vector w/ the same element width and twice the number
-  // of elements. We want the other way around, the same number of elements,
-  // each twice the width.
-  //
-  // Increase the bitwidth of the element to the next pow-of-two
-  // (which is greater than 8 bits).
-
-  EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
-  assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
  SDLoc dl(Op);
  SmallVector<SDValue, 4> Operands(Op.getNumOperands());

@ -505,35 +496,28 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
 // elements and then truncate the result.  This is different from the default
 // PromoteVector which uses bitcast to promote thus assumning that the
 // promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
-  assert(Op.getNode()->getNumValues() == 1 &&
-         "Can't promote a vector with multiple results!");
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
+  MVT VT = Op.getSimpleValueType();
+  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
+         "Vectors have different number of elements!");

-  EVT NewVT = VT;
-  unsigned NewOpc;
-  while (true) {
-    NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
-    assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
-    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
-      NewOpc = ISD::FP_TO_SINT;
-      break;
-    }
-    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
-      NewOpc = ISD::FP_TO_UINT;
-      break;
-    }
-  }
+  unsigned NewOpc = Op->getOpcode();
+  // Change FP_TO_UINT to FP_TO_SINT if possible.
+  // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
+  if (NewOpc == ISD::FP_TO_UINT &&
+      TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+    NewOpc = ISD::FP_TO_SINT;

  SDLoc dl(Op);
-  SDValue Promoted  = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+  SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));

  // Assert that the converted value fits in the original type.  If it doesn't
  // (eg: because the value being converted is too big), then the result of the
  // original operation was undefined anyway, so the assert is still correct.
  Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
                                                            : ISD::AssertSext,
-                         dl, NewVT, Promoted,
+                         dl, NVT, Promoted,
                         DAG.getValueType(VT.getScalarType()));
  return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
 }
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -3374,11 +3374,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
  EVT VT = N->getValueType(0);

  SDValue InOp = N->getOperand(0);
-  // If some legalization strategy other than widening is used on the operand,
-  // we can't safely assume that just extending the low lanes is the correct
-  // transformation.
-  if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
-    return WidenVecOp_Convert(N);
+  assert(getTypeAction(InOp.getValueType()) ==
+             TargetLowering::TypeWidenVector &&
+         "Unexpected type action");
  InOp = GetWidenedVector(InOp);
  assert(VT.getVectorNumElements() <
             InOp.getValueType().getVectorNumElements() &&
@ -3440,20 +3438,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
 }

 SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
-  // Since the result is legal and the input is illegal, it is unlikely that we
-  // can fix the input to a legal type so unroll the convert into some scalar
-  // code and create a nasty build vector.
+  // Since the result is legal and the input is illegal.
  EVT VT = N->getValueType(0);
  EVT EltVT = VT.getVectorElementType();
  SDLoc dl(N);
  unsigned NumElts = VT.getVectorNumElements();
  SDValue InOp = N->getOperand(0);
-  if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
-    InOp = GetWidenedVector(InOp);
+  assert(getTypeAction(InOp.getValueType()) ==
+             TargetLowering::TypeWidenVector &&
+         "Unexpected type action");
+  InOp = GetWidenedVector(InOp);
  EVT InVT = InOp.getValueType();
+  unsigned Opcode = N->getOpcode();
+
+  // See if a widened result type would be legal, if so widen the node.
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+                                InVT.getVectorNumElements());
+  if (TLI.isTypeLegal(WideVT)) {
+    SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+                       DAG.getIntPtrConstant(0, dl));
+  }
+
  EVT InEltVT = InVT.getVectorElementType();

-  unsigned Opcode = N->getOpcode();
+  // Unroll the convert into some scalar code and create a nasty build vector.
  SmallVector<SDValue, 16> Ops(NumElts);
  for (unsigned i=0; i < NumElts; ++i)
    Ops[i] = DAG.getNode(
@ -3506,8 +3515,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
  unsigned NumOperands = N->getNumOperands();
  for (unsigned i=0; i < NumOperands; ++i) {
    SDValue InOp = N->getOperand(i);
-    if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
-      InOp = GetWidenedVector(InOp);
+    assert(getTypeAction(InOp.getValueType()) ==
+               TargetLowering::TypeWidenVector &&
+           "Unexpected type action");
+    InOp = GetWidenedVector(InOp);
    for (unsigned j=0; j < NumInElts; ++j)
      Ops[Idx++] = DAG.getNode(
          ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@ -8940,17 +8940,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
    // At this point we know that there is a 1-1 correspondence between LLVM PHI
    // nodes and Machine PHI nodes, but the incoming operands have not been
    // emitted yet.
-    for (BasicBlock::const_iterator I = SuccBB->begin();
-         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    for (const PHINode &PN : SuccBB->phis()) {
      // Ignore dead phi's.
-      if (PN->use_empty()) continue;
+      if (PN.use_empty())
+        continue;

      // Skip empty types
-      if (PN->getType()->isEmptyTy())
+      if (PN.getType()->isEmptyTy())
        continue;

      unsigned Reg;
-      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+      const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);

      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
        unsigned &RegOut = ConstantsOut[C];
@ -8977,7 +8977,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
      // the input for this MBB.
      SmallVector<EVT, 4> ValueVTs;
      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
+      ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
        EVT VT = ValueVTs[vti];
        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@ -1445,13 +1445,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
      }

      if (AllPredsVisited) {
-        for (BasicBlock::const_iterator I = LLVMBB->begin();
-             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
-          FuncInfo->ComputePHILiveOutRegInfo(PN);
+        for (const PHINode &PN : LLVMBB->phis())
+          FuncInfo->ComputePHILiveOutRegInfo(&PN);
      } else {
-        for (BasicBlock::const_iterator I = LLVMBB->begin();
-             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
-          FuncInfo->InvalidatePHILiveOutRegInfo(PN);
+        for (const PHINode &PN : LLVMBB->phis())
+          FuncInfo->InvalidatePHILiveOutRegInfo(&PN);
      }

      FuncInfo->VisitedBBs.insert(LLVMBB);
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@ -712,8 +712,11 @@ bool TargetPassConfig::addCoreISelPasses() {

  // Ask the target for an isel.
  // Enable GlobalISel if the target wants to, but allow that to be overriden.
+  // Explicitly enabling fast-isel should override implicitly enabled
+  // global-isel.
  if (EnableGlobalISel == cl::BOU_TRUE ||
-      (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled())) {
+      (EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() &&
+       EnableFastISelOption != cl::BOU_TRUE)) {
    if (addIRTranslator())
      return true;

@ -1133,7 +1136,12 @@ bool TargetPassConfig::isGlobalISelEnabled() const {
 }

 bool TargetPassConfig::isGlobalISelAbortEnabled() const {
-  return EnableGlobalISelAbort == 1;
+  if (EnableGlobalISelAbort.getNumOccurrences() > 0)
+    return EnableGlobalISelAbort == 1;
+
+  // When no abort behaviour is specified, we don't abort if the target says
+  // that GISel is enabled.
+  return !isGlobalISelEnabled();
 }

 bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@ -838,17 +838,11 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
    for (auto &BBMapping : Orig2Clone) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
-      for (Instruction &OldI : *OldBlock) {
-        auto *OldPN = dyn_cast<PHINode>(&OldI);
-        if (!OldPN)
-          break;
-        UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
+      for (PHINode &OldPN : OldBlock->phis()) {
+        UpdatePHIOnClonedBlock(&OldPN, /*IsForOldBlock=*/true);
      }
-      for (Instruction &NewI : *NewBlock) {
-        auto *NewPN = dyn_cast<PHINode>(&NewI);
-        if (!NewPN)
-          break;
-        UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
+      for (PHINode &NewPN : NewBlock->phis()) {
+        UpdatePHIOnClonedBlock(&NewPN, /*IsForOldBlock=*/false);
      }
    }

@ -858,17 +852,13 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
      BasicBlock *OldBlock = BBMapping.first;
      BasicBlock *NewBlock = BBMapping.second;
      for (BasicBlock *SuccBB : successors(NewBlock)) {
-        for (Instruction &SuccI : *SuccBB) {
-          auto *SuccPN = dyn_cast<PHINode>(&SuccI);
-          if (!SuccPN)
-            break;
-
+        for (PHINode &SuccPN : SuccBB->phis()) {
          // Ok, we have a PHI node.  Figure out what the incoming value was for
          // the OldBlock.
-          int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+          int OldBlockIdx = SuccPN.getBasicBlockIndex(OldBlock);
          if (OldBlockIdx == -1)
            break;
-          Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+          Value *IV = SuccPN.getIncomingValue(OldBlockIdx);

          // Remap the value if necessary.
          if (auto *Inst = dyn_cast<Instruction>(IV)) {
@ -877,7 +867,7 @@ void WinEHPrepare::cloneCommonBlocks(Function &F) {
              IV = I->second;
          }

-          SuccPN->addIncoming(IV, NewBlock);
+          SuccPN.addIncoming(IV, NewBlock);
        }
      }
    }
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@ -264,7 +264,8 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const {
 }

 iterator_range<BasicBlock::phi_iterator> BasicBlock::phis() {
-  return make_range<phi_iterator>(dyn_cast<PHINode>(&front()), nullptr);
+  PHINode *P = empty() ? nullptr : dyn_cast<PHINode>(&*begin());
+  return make_range<phi_iterator>(P, nullptr);
 }

 /// This method is used to notify a BasicBlock that the
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@ -2210,24 +2210,23 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
    SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
    SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
    std::sort(Preds.begin(), Preds.end());
-    PHINode *PN;
-    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+    for (const PHINode &PN : BB.phis()) {
      // Ensure that PHI nodes have at least one entry!
-      Assert(PN->getNumIncomingValues() != 0,
+      Assert(PN.getNumIncomingValues() != 0,
             "PHI nodes must have at least one entry.  If the block is dead, "
             "the PHI should be removed!",
-             PN);
-      Assert(PN->getNumIncomingValues() == Preds.size(),
+             &PN);
+      Assert(PN.getNumIncomingValues() == Preds.size(),
             "PHINode should have one entry for each predecessor of its "
             "parent basic block!",
-             PN);
+             &PN);

      // Get and sort all incoming values in the PHI node...
      Values.clear();
-      Values.reserve(PN->getNumIncomingValues());
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-        Values.push_back(std::make_pair(PN->getIncomingBlock(i),
-                                        PN->getIncomingValue(i)));
+      Values.reserve(PN.getNumIncomingValues());
+      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+        Values.push_back(
+            std::make_pair(PN.getIncomingBlock(i), PN.getIncomingValue(i)));
      std::sort(Values.begin(), Values.end());

      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
@ -2239,12 +2238,12 @@ void Verifier::visitBasicBlock(BasicBlock &BB) {
                   Values[i].second == Values[i - 1].second,
               "PHI node has multiple entries for the same basic block with "
               "different incoming values!",
-               PN, Values[i].first, Values[i].second, Values[i - 1].second);
+               &PN, Values[i].first, Values[i].second, Values[i - 1].second);

        // Check to make sure that the predecessors and PHI node entries are
        // matched up.
        Assert(Values[i].first == Preds[i],
-               "PHI node entries do not match predecessors!", PN,
+               "PHI node entries do not match predecessors!", &PN,
               Values[i].first, Preds[i]);
      }
    }
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@ -447,7 +447,7 @@ bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) {
  Lex();
  StringRef Name;
  if (getParser().parseIdentifier(Name))
-    return true;
+    return TokError("invalid metadata symbol");
  Associated = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
  if (!Associated || !Associated->isInSection())
    return TokError("symbol is not in a section: " + Name);
--- a/contrib/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@ -412,10 +412,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  // We provide the opt remark emitter pass for LICM to use. We only need to do
  // this once as it is immutable.
  FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
  FPM.addPass(SimplifyCFGPass());
  FPM.addPass(InstCombinePass());
-  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));

  // Eliminate redundancies.
  if (Level != O1) {
@ -450,7 +450,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  FPM.addPass(JumpThreadingPass());
  FPM.addPass(CorrelatedValuePropagationPass());
  FPM.addPass(DSEPass());
-  FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+  FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));

  for (auto &C : ScalarOptimizerLateEPCallbacks)
    C(FPM, Level);
@ -510,7 +510,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
    MPM.addPass(PGOInstrumentationGen());

    FunctionPassManager FPM;
-    FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+    FPM.addPass(
+        createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));

    // Add the profile lowering pass.
@ -730,7 +731,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
    C(OptimizePM, Level);

  // First rotate loops that may have been un-rotated by prior passes.
-  OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+  OptimizePM.addPass(
+      createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));

  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
  // into separate loop that would otherwise inhibit vectorization.  This is
@ -777,7 +779,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
  OptimizePM.addPass(LoopUnrollPass(Level));
  OptimizePM.addPass(InstCombinePass());
  OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
-  OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+  OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));

  // Now that we've vectorized and unrolled loops, we may have more refined
  // alignment information, try to re-derive it here.
@ -1533,7 +1535,8 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
                                 DebugLogging))
        return false;
      // Add the nested pass manager with the appropriate adaptor.
-      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+      FPM.addPass(
+          createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
      return true;
    }
    if (auto Count = parseRepeatPassName(Name)) {
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@ -873,6 +873,45 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
    NewArgv.push_back(nullptr);
 }

+void cl::tokenizeConfigFile(StringRef Source, StringSaver &Saver,
+                            SmallVectorImpl<const char *> &NewArgv,
+                            bool MarkEOLs) {
+  for (const char *Cur = Source.begin(); Cur != Source.end();) {
+    SmallString<128> Line;
+    // Check for comment line.
+    if (isWhitespace(*Cur)) {
+      while (Cur != Source.end() && isWhitespace(*Cur))
+        ++Cur;
+      continue;
+    }
+    if (*Cur == '#') {
+      while (Cur != Source.end() && *Cur != '\n')
+        ++Cur;
+      continue;
+    }
+    // Find end of the current line.
+    const char *Start = Cur;
+    for (const char *End = Source.end(); Cur != End; ++Cur) {
+      if (*Cur == '\\') {
+        if (Cur + 1 != End) {
+          ++Cur;
+          if (*Cur == '\n' ||
+              (*Cur == '\r' && (Cur + 1 != End) && Cur[1] == '\n')) {
+            Line.append(Start, Cur - 1);
+            if (*Cur == '\r')
+              ++Cur;
+            Start = Cur + 1;
+          }
+        }
+      } else if (*Cur == '\n')
+        break;
+    }
+    // Tokenize line.
+    Line.append(Start, Cur);
+    cl::TokenizeGNUCommandLine(Line, Saver, NewArgv, MarkEOLs);
+  }
+}
+
 // It is called byte order marker but the UTF-8 BOM is actually not affected
 // by the host system's endianness.
 static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
@ -977,6 +1016,15 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
  return AllExpanded;
 }

+bool cl::readConfigFile(StringRef CfgFile, StringSaver &Saver,
+                        SmallVectorImpl<const char *> &Argv) {
+  if (!ExpandResponseFile(CfgFile, Saver, cl::tokenizeConfigFile, Argv,
+                          /*MarkEOLs*/ false, /*RelativeNames*/ true))
+    return false;
+  return ExpandResponseFiles(Saver, cl::tokenizeConfigFile, Argv,
+                             /*MarkEOLs*/ false, /*RelativeNames*/ true);
+}
+
 /// ParseEnvironmentOptions - An alternative entry point to the
 /// CommandLine library, which allows you to read the program's name
 /// from the caller (as PROGNAME) and its command-line arguments from
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -632,16 +632,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,

    // AArch64 doesn't have a direct vector ->f32 conversion instructions for
    // elements smaller than i32, so promote the input to i32 first.
-    setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
    // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
    // -> v8f16 conversions.
-    setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
-    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
    // Similarly, there is no direct i32 -> f64 vector conversion instruction.
    setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
    setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@ -756,27 +756,31 @@ class ZPRRegOp <string Suffix, AsmOperandClass C,

 //******************************************************************************

-// SVE predicate register class.
-def PPR : RegisterClass<"AArch64",
-                        [nxv16i1, nxv8i1, nxv4i1, nxv2i1],
-                        16, (sequence "P%u", 0, 15)> {
+// SVE predicate register classes.
+class PPRClass<int lastreg> : RegisterClass<
+                                  "AArch64",
+                                  [ nxv16i1, nxv8i1, nxv4i1, nxv2i1 ], 16,
+                                  (sequence "P%u", 0, lastreg)> {
  let Size = 16;
 }

-class PPRAsmOperand <string name, int Width>: AsmOperandClass {
+def PPR    : PPRClass<15>;
+def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class.
+
+class PPRAsmOperand <string name, string RegClass, int Width>: AsmOperandClass {
  let Name = "SVE" # name # "Reg";
  let PredicateMethod = "isSVEVectorRegOfWidth<"
-                            # Width # ", AArch64::PPRRegClassID>";
+                            # Width # ", " # "AArch64::" # RegClass # "RegClassID>";
  let DiagnosticType = "InvalidSVE" # name # "Reg";
  let RenderMethod = "addRegOperands";
  let ParserMethod = "tryParseSVEPredicateVector";
 }

-def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", -1>;
-def PPRAsmOp8   : PPRAsmOperand<"PredicateB",  8>;
-def PPRAsmOp16  : PPRAsmOperand<"PredicateH", 16>;
-def PPRAsmOp32  : PPRAsmOperand<"PredicateS", 32>;
-def PPRAsmOp64  : PPRAsmOperand<"PredicateD", 64>;
+def PPRAsmOpAny : PPRAsmOperand<"PredicateAny", "PPR", -1>;
+def PPRAsmOp8   : PPRAsmOperand<"PredicateB",   "PPR",  8>;
+def PPRAsmOp16  : PPRAsmOperand<"PredicateH",   "PPR", 16>;
+def PPRAsmOp32  : PPRAsmOperand<"PredicateS",   "PPR", 32>;
+def PPRAsmOp64  : PPRAsmOperand<"PredicateD",   "PPR", 64>;

 def PPRAny : PPRRegOp<"",  PPRAsmOpAny, PPR>;
 def PPR8   : PPRRegOp<"b", PPRAsmOp8,   PPR>;
@ -784,6 +788,18 @@ def PPR16  : PPRRegOp<"h", PPRAsmOp16,  PPR>;
 def PPR32  : PPRRegOp<"s", PPRAsmOp32,  PPR>;
 def PPR64  : PPRRegOp<"d", PPRAsmOp64,  PPR>;

+def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", -1>;
+def PPRAsmOp3b8   : PPRAsmOperand<"Predicate3bB",   "PPR_3b",  8>;
+def PPRAsmOp3b16  : PPRAsmOperand<"Predicate3bH",   "PPR_3b", 16>;
+def PPRAsmOp3b32  : PPRAsmOperand<"Predicate3bS",   "PPR_3b", 32>;
+def PPRAsmOp3b64  : PPRAsmOperand<"Predicate3bD",   "PPR_3b", 64>;
+
+def PPR3bAny : PPRRegOp<"",  PPRAsmOp3bAny, PPR_3b>;
+def PPR3b8   : PPRRegOp<"b", PPRAsmOp3b8,   PPR_3b>;
+def PPR3b16  : PPRRegOp<"h", PPRAsmOp3b16,  PPR_3b>;
+def PPR3b32  : PPRRegOp<"s", PPRAsmOp3b32,  PPR_3b>;
+def PPR3b64  : PPRRegOp<"d", PPRAsmOp3b64,  PPR_3b>;
+
 //******************************************************************************

 // SVE vector register class
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@ -136,7 +136,7 @@ static cl::opt<bool>
 static cl::opt<int> EnableGlobalISelAtO(
    "aarch64-enable-global-isel-at-O", cl::Hidden,
    cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
-    cl::init(-1));
+    cl::init(0));

 static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
                                         cl::init(true), cl::Hidden);
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@ -819,6 +819,10 @@ class AArch64Operand : public MCParsedAsmOperand {
  }

  bool isReg() const override {
+    return Kind == k_Register;
+  }
+
+  bool isScalarReg() const {
    return Kind == k_Register && Reg.Kind == RegKind::Scalar;
  }

@ -839,6 +843,7 @@ class AArch64Operand : public MCParsedAsmOperand {
      RK = RegKind::SVEDataVector;
      break;
    case AArch64::PPRRegClassID:
+    case AArch64::PPR_3bRegClassID:
      RK = RegKind::SVEPredicateVector;
      break;
    default:
@ -3148,7 +3153,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
      return true;

    if (Operands.size() < 2 ||
-        !static_cast<AArch64Operand &>(*Operands[1]).isReg())
+        !static_cast<AArch64Operand &>(*Operands[1]).isScalarReg())
      return Error(Loc, "Only valid when first operand is register");

    bool IsXReg =
@ -3648,6 +3653,12 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
  case Match_InvalidSVEPredicateSReg:
  case Match_InvalidSVEPredicateDReg:
    return Error(Loc, "invalid predicate register.");
+  case Match_InvalidSVEPredicate3bAnyReg:
+  case Match_InvalidSVEPredicate3bBReg:
+  case Match_InvalidSVEPredicate3bHReg:
+  case Match_InvalidSVEPredicate3bSReg:
+  case Match_InvalidSVEPredicate3bDReg:
+    return Error(Loc, "restricted predicate has range [0, 7].");
  default:
    llvm_unreachable("unexpected error code!");
  }
@ -3670,7 +3681,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  if (NumOperands == 4 && Tok == "lsl") {
    AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
    AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
-    if (Op2.isReg() && Op3.isImm()) {
+    if (Op2.isScalarReg() && Op3.isImm()) {
      const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
      if (Op3CE) {
        uint64_t Op3Val = Op3CE->getValue();
@ -3702,7 +3713,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    AArch64Operand LSBOp = static_cast<AArch64Operand &>(*Operands[2]);
    AArch64Operand WidthOp = static_cast<AArch64Operand &>(*Operands[3]);

-    if (Op1.isReg() && LSBOp.isImm() && WidthOp.isImm()) {
+    if (Op1.isScalarReg() && LSBOp.isImm() && WidthOp.isImm()) {
      const MCConstantExpr *LSBCE = dyn_cast<MCConstantExpr>(LSBOp.getImm());
      const MCConstantExpr *WidthCE = dyn_cast<MCConstantExpr>(WidthOp.getImm());

@ -3758,7 +3769,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
      AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
      AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);

-      if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+      if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());

@ -3822,7 +3833,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
      AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
      AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);

-      if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+      if (Op1.isScalarReg() && Op3.isImm() && Op4.isImm()) {
        const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
        const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());

@ -3901,7 +3912,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
    // The source register can be Wn here, but the matcher expects a
    // GPR64. Twiddle it here if necessary.
    AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
-    if (Op.isReg()) {
+    if (Op.isScalarReg()) {
      unsigned Reg = getXRegFromWReg(Op.getReg());
      Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
                                              Op.getStartLoc(), Op.getEndLoc(),
@ -3911,13 +3922,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  // FIXME: Likewise for sxt[bh] with a Xd dst operand
  else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
    AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
-    if (Op.isReg() &&
+    if (Op.isScalarReg() &&
        AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
            Op.getReg())) {
      // The source register can be Wn here, but the matcher expects a
      // GPR64. Twiddle it here if necessary.
      AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
-      if (Op.isReg()) {
+      if (Op.isScalarReg()) {
        unsigned Reg = getXRegFromWReg(Op.getReg());
        Operands[2] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
                                                Op.getStartLoc(),
@ -3928,13 +3939,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  // FIXME: Likewise for uxt[bh] with a Xd dst operand
  else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
    AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
-    if (Op.isReg() &&
+    if (Op.isScalarReg() &&
        AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
            Op.getReg())) {
      // The source register can be Wn here, but the matcher expects a
      // GPR32. Twiddle it here if necessary.
      AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
-      if (Op.isReg()) {
+      if (Op.isScalarReg()) {
        unsigned Reg = getWRegFromXReg(Op.getReg());
        Operands[1] = AArch64Operand::CreateReg(Reg, RegKind::Scalar,
                                                Op.getStartLoc(),
@ -4077,6 +4088,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  case Match_InvalidSVEPredicateHReg:
  case Match_InvalidSVEPredicateSReg:
  case Match_InvalidSVEPredicateDReg:
+  case Match_InvalidSVEPredicate3bAnyReg:
+  case Match_InvalidSVEPredicate3bBReg:
+  case Match_InvalidSVEPredicate3bHReg:
+  case Match_InvalidSVEPredicate3bSReg:
+  case Match_InvalidSVEPredicate3bDReg:
  case Match_MSR:
  case Match_MRS: {
    if (ErrorInfo >= Operands.size())
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@ -91,6 +91,9 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                           uint64_t Address,
                                           const void *Decode);
+LLVM_ATTRIBUTE_UNUSED static DecodeStatus
+DecodePPR_3bRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address,
+                          const void *Decode);

 static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm,
                                               uint64_t Address,
@ -481,6 +484,16 @@ static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
  return Success;
 }

+static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
+                                              uint64_t Addr,
+                                              const void* Decoder) {
+  if (RegNo > 7)
+    return Fail;
+
+  // Just reuse the PPR decode table
+  return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder);
+}
+
 static const unsigned VectorDecoderTable[] = {
    AArch64::Q0,  AArch64::Q1,  AArch64::Q2,  AArch64::Q3,  AArch64::Q4,
    AArch64::Q5,  AArch64::Q6,  AArch64::Q7,  AArch64::Q8,  AArch64::Q9,
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@ -605,10 +605,10 @@ class COFFAArch64AsmBackend : public AArch64AsmBackend {
 }

 MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
+                                              const MCSubtargetInfo &STI,
                                              const MCRegisterInfo &MRI,
-                                              const Triple &TheTriple,
-                                              StringRef CPU,
                                              const MCTargetOptions &Options) {
+  const Triple &TheTriple = STI.getTargetTriple();
  if (TheTriple.isOSBinFormatMachO())
    return new DarwinAArch64AsmBackend(T, TheTriple, MRI);

@ -624,10 +624,10 @@ MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
 }

 MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
+                                              const MCSubtargetInfo &STI,
                                              const MCRegisterInfo &MRI,
-                                              const Triple &TheTriple,
-                                              StringRef CPU,
                                              const MCTargetOptions &Options) {
+  const Triple &TheTriple = STI.getTargetTriple();
  assert(TheTriple.isOSBinFormatELF() &&
         "Big endian is only supported for ELF targets!");
  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@ -45,12 +45,12 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
                                          MCContext &Ctx);
 MCAsmBackend *createAArch64leAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU,
                                        const MCTargetOptions &Options);
 MCAsmBackend *createAArch64beAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU,
                                        const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@ -815,6 +815,10 @@ class KernelScopeInfo {
 class AMDGPUAsmParser : public MCTargetAsmParser {
  MCAsmParser &Parser;

+  // Number of extra operands parsed after the first optional operand.
+  // This may be necessary to skip hardcoded mandatory operands.
+  static const unsigned MAX_OPR_LOOKAHEAD = 1;
+
  unsigned ForcedEncodingSize = 0;
  bool ForcedDPP = false;
  bool ForcedSDWA = false;
@ -1037,6 +1041,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {

 public:
  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
+  OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);

  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
  OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
@ -3859,7 +3864,7 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
  } else {
    // Swizzle "offset" operand is optional.
    // If it is omitted, try parsing other optional operands.
-    return parseOptionalOperand(Operands);
+    return parseOptionalOpr(Operands);
  }
 }

@ -4179,6 +4184,39 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
 };

 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
+  unsigned size = Operands.size();
+  assert(size > 0);
+
+  OperandMatchResultTy res = parseOptionalOpr(Operands);
+
+  // This is a hack to enable hardcoded mandatory operands which follow
+  // optional operands.
+  //
+  // Current design assumes that all operands after the first optional operand
+  // are also optional. However implementation of some instructions violates
+  // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
+  //
+  // To alleviate this problem, we have to (implicitly) parse extra operands
+  // to make sure autogenerated parser of custom operands never hit hardcoded
+  // mandatory operands.
+
+  if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) {
+
+    // We have parsed the first optional operand.
+    // Parse as many operands as necessary to skip all mandatory operands.
+
+    for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
+      if (res != MatchOperand_Success ||
+          getLexer().is(AsmToken::EndOfStatement)) break;
+      if (getLexer().is(AsmToken::Comma)) Parser.Lex();
+      res = parseOptionalOpr(Operands);
+    }
+  }
+
+  return res;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
  OperandMatchResultTy res;
  for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
    // try to parse any optional operand here
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@ -198,9 +198,9 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
 } // end anonymous namespace

 MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
+                                           const MCSubtargetInfo &STI,
                                           const MCRegisterInfo &MRI,
-                                           const Triple &TT, StringRef CPU,
                                           const MCTargetOptions &Options) {
  // Use 64-bit ELF for amdgcn
-  return new ELFAMDGPUAsmBackend(T, TT);
+  return new ELFAMDGPUAsmBackend(T, STI.getTargetTriple());
 }
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@ -45,8 +45,9 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                     const MCRegisterInfo &MRI,
                                     MCContext &Ctx);

-MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU,
+MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
+                                     const MCSubtargetInfo &STI,
+                                     const MCRegisterInfo &MRI,
                                     const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@ -71,9 +71,9 @@ class MIMG_Store_Helper <bits<7> op, string asm,
       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da", dns>, MIMGe<op> {
  let ssamp = 0;
-  let mayLoad = 1; // TableGen requires this for matching with the intrinsics
+  let mayLoad = 0;
  let mayStore = 1;
-  let hasSideEffects = 1;
+  let hasSideEffects = 0;
  let hasPostISelHook = 0;
  let DisableWQM = 1;
 }
@ -103,10 +103,10 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
    (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
         dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
         r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-    asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
-  > {
+    asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"> {
+  let mayLoad = 1;
  let mayStore = 1;
-  let hasSideEffects = 1;
+  let hasSideEffects = 1; // FIXME: Remove this
  let hasPostISelHook = 0;
  let DisableWQM = 1;
  let Constraints = "$vdst = $vdata";
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@ -575,6 +575,221 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,

    return true;
  }
+
+  // Image load.
+  case Intrinsic::amdgcn_image_load:
+  case Intrinsic::amdgcn_image_load_mip:
+
+  // Sample.
+  case Intrinsic::amdgcn_image_sample:
+  case Intrinsic::amdgcn_image_sample_cl:
+  case Intrinsic::amdgcn_image_sample_d:
+  case Intrinsic::amdgcn_image_sample_d_cl:
+  case Intrinsic::amdgcn_image_sample_l:
+  case Intrinsic::amdgcn_image_sample_b:
+  case Intrinsic::amdgcn_image_sample_b_cl:
+  case Intrinsic::amdgcn_image_sample_lz:
+  case Intrinsic::amdgcn_image_sample_cd:
+  case Intrinsic::amdgcn_image_sample_cd_cl:
+
+    // Sample with comparison.
+  case Intrinsic::amdgcn_image_sample_c:
+  case Intrinsic::amdgcn_image_sample_c_cl:
+  case Intrinsic::amdgcn_image_sample_c_d:
+  case Intrinsic::amdgcn_image_sample_c_d_cl:
+  case Intrinsic::amdgcn_image_sample_c_l:
+  case Intrinsic::amdgcn_image_sample_c_b:
+  case Intrinsic::amdgcn_image_sample_c_b_cl:
+  case Intrinsic::amdgcn_image_sample_c_lz:
+  case Intrinsic::amdgcn_image_sample_c_cd:
+  case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+    // Sample with offsets.
+  case Intrinsic::amdgcn_image_sample_o:
+  case Intrinsic::amdgcn_image_sample_cl_o:
+  case Intrinsic::amdgcn_image_sample_d_o:
+  case Intrinsic::amdgcn_image_sample_d_cl_o:
+  case Intrinsic::amdgcn_image_sample_l_o:
+  case Intrinsic::amdgcn_image_sample_b_o:
+  case Intrinsic::amdgcn_image_sample_b_cl_o:
+  case Intrinsic::amdgcn_image_sample_lz_o:
+  case Intrinsic::amdgcn_image_sample_cd_o:
+  case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+    // Sample with comparison and offsets.
+  case Intrinsic::amdgcn_image_sample_c_o:
+  case Intrinsic::amdgcn_image_sample_c_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_d_o:
+  case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_l_o:
+  case Intrinsic::amdgcn_image_sample_c_b_o:
+  case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+  case Intrinsic::amdgcn_image_sample_c_lz_o:
+  case Intrinsic::amdgcn_image_sample_c_cd_o:
+  case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+    // Basic gather4
+  case Intrinsic::amdgcn_image_gather4:
+  case Intrinsic::amdgcn_image_gather4_cl:
+  case Intrinsic::amdgcn_image_gather4_l:
+  case Intrinsic::amdgcn_image_gather4_b:
+  case Intrinsic::amdgcn_image_gather4_b_cl:
+  case Intrinsic::amdgcn_image_gather4_lz:
+
+    // Gather4 with comparison
+  case Intrinsic::amdgcn_image_gather4_c:
+  case Intrinsic::amdgcn_image_gather4_c_cl:
+  case Intrinsic::amdgcn_image_gather4_c_l:
+  case Intrinsic::amdgcn_image_gather4_c_b:
+  case Intrinsic::amdgcn_image_gather4_c_b_cl:
+  case Intrinsic::amdgcn_image_gather4_c_lz:
+
+    // Gather4 with offsets
+  case Intrinsic::amdgcn_image_gather4_o:
+  case Intrinsic::amdgcn_image_gather4_cl_o:
+  case Intrinsic::amdgcn_image_gather4_l_o:
+  case Intrinsic::amdgcn_image_gather4_b_o:
+  case Intrinsic::amdgcn_image_gather4_b_cl_o:
+  case Intrinsic::amdgcn_image_gather4_lz_o:
+
+    // Gather4 with comparison and offsets
+  case Intrinsic::amdgcn_image_gather4_c_o:
+  case Intrinsic::amdgcn_image_gather4_c_cl_o:
+  case Intrinsic::amdgcn_image_gather4_c_l_o:
+  case Intrinsic::amdgcn_image_gather4_c_b_o:
+  case Intrinsic::amdgcn_image_gather4_c_b_cl_o:
+  case Intrinsic::amdgcn_image_gather4_c_lz_o: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(1));
+    Info.align = 0;
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MODereferenceable;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_store:
+  case Intrinsic::amdgcn_image_store_mip: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(2));
+    Info.flags = MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+    Info.align = 0;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_atomic_swap:
+  case Intrinsic::amdgcn_image_atomic_add:
+  case Intrinsic::amdgcn_image_atomic_sub:
+  case Intrinsic::amdgcn_image_atomic_smin:
+  case Intrinsic::amdgcn_image_atomic_umin:
+  case Intrinsic::amdgcn_image_atomic_smax:
+  case Intrinsic::amdgcn_image_atomic_umax:
+  case Intrinsic::amdgcn_image_atomic_and:
+  case Intrinsic::amdgcn_image_atomic_or:
+  case Intrinsic::amdgcn_image_atomic_xor:
+  case Intrinsic::amdgcn_image_atomic_inc:
+  case Intrinsic::amdgcn_image_atomic_dec: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(2));
+
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+
+    // XXX - Should this be volatile without known ordering?
+    Info.flags |= MachineMemOperand::MOVolatile;
+    return true;
+  }
+  case Intrinsic::amdgcn_image_atomic_cmpswap: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.ptrVal = MFI->getImagePSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(3));
+
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+
+    // XXX - Should this be volatile without known ordering?
+    Info.flags |= MachineMemOperand::MOVolatile;
+    return true;
+  }
+  case Intrinsic::amdgcn_tbuffer_load:
+  case Intrinsic::amdgcn_buffer_load:
+  case Intrinsic::amdgcn_buffer_load_format: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.ptrVal = MFI->getBufferPSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(0));
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MODereferenceable;
+
+    // There is a constant offset component, but there are additional register
+    // offsets which could break AA if we set the offset to anything non-0.
+    return true;
+  }
+  case Intrinsic::amdgcn_tbuffer_store:
+  case Intrinsic::amdgcn_buffer_store:
+  case Intrinsic::amdgcn_buffer_store_format: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_VOID;
+    Info.ptrVal = MFI->getBufferPSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(1));
+    Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
+    Info.flags = MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable;
+    return true;
+  }
+  case Intrinsic::amdgcn_buffer_atomic_swap:
+  case Intrinsic::amdgcn_buffer_atomic_add:
+  case Intrinsic::amdgcn_buffer_atomic_sub:
+  case Intrinsic::amdgcn_buffer_atomic_smin:
+  case Intrinsic::amdgcn_buffer_atomic_umin:
+  case Intrinsic::amdgcn_buffer_atomic_smax:
+  case Intrinsic::amdgcn_buffer_atomic_umax:
+  case Intrinsic::amdgcn_buffer_atomic_and:
+  case Intrinsic::amdgcn_buffer_atomic_or:
+  case Intrinsic::amdgcn_buffer_atomic_xor: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.ptrVal = MFI->getBufferPSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(1));
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable |
+                 MachineMemOperand::MOVolatile;
+    return true;
+  }
+  case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    Info.ptrVal = MFI->getBufferPSV(
+      *MF.getSubtarget<SISubtarget>().getInstrInfo(),
+      CI.getArgOperand(2));
+    Info.memVT = MVT::getVT(CI.getType());
+    Info.flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MOStore |
+                 MachineMemOperand::MODereferenceable |
+                 MachineMemOperand::MOVolatile;
+    return true;
+  }
  default:
    return false;
  }
@ -2946,24 +3161,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  if (TII->isMIMG(MI)) {
-      if (!MI.memoperands_empty())
-        return BB;
+    if (MI.memoperands_empty() && MI.mayLoadOrStore()) {
+      report_fatal_error("missing mem operand from MIMG instruction");
+    }
    // Add a memoperand for mimg instructions so that they aren't assumed to
    // be ordered memory instuctions.

-    MachinePointerInfo PtrInfo(MFI->getImagePSV());
-    MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
-    if (MI.mayStore())
-      Flags |= MachineMemOperand::MOStore;
-
-    if (MI.mayLoad())
-      Flags |= MachineMemOperand::MOLoad;
-
-    if (Flags != MachineMemOperand::MODereferenceable) {
-      auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
-      MI.addMemOperand(*MF, MMO);
-    }
-
    return BB;
  }

@ -4257,7 +4460,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                 SelectionDAG &DAG) const {
  unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
  SDLoc DL(Op);
-  MachineFunction &MF = DAG.getMachineFunction();

  switch (IntrID) {
  case Intrinsic::amdgcn_atomic_inc:
@ -4284,21 +4486,18 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      Op.getOperand(5), // glc
      Op.getOperand(6)  // slc
    };
-    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

    unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
        AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
    EVT VT = Op.getValueType();
    EVT IntVT = VT.changeTypeToInteger();

-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(MFI->getBufferPSV()),
-      MachineMemOperand::MOLoad,
-      VT.getStoreSize(), VT.getStoreSize());
-
-    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
+    auto *M = cast<MemSDNode>(Op);
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
+                                   M->getMemOperand());
  }
  case Intrinsic::amdgcn_tbuffer_load: {
+    MemSDNode *M = cast<MemSDNode>(Op);
    SDValue Ops[] = {
      Op.getOperand(0),  // Chain
      Op.getOperand(2),  // rsrc
@ -4312,14 +4511,10 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      Op.getOperand(10)   // slc
    };

-    EVT VT = Op.getOperand(2).getValueType();
+    EVT VT = Op.getValueType();

-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(),
-      MachineMemOperand::MOLoad,
-      VT.getStoreSize(), VT.getStoreSize());
    return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
-                                   Op->getVTList(), Ops, VT, MMO);
+                                   Op->getVTList(), Ops, VT, M->getMemOperand());
  }
  case Intrinsic::amdgcn_buffer_atomic_swap:
  case Intrinsic::amdgcn_buffer_atomic_add:
@ -4339,14 +4534,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      Op.getOperand(5), // offset
      Op.getOperand(6)  // slc
    };
-    EVT VT = Op.getOperand(3).getValueType();
-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(),
-      MachineMemOperand::MOLoad |
-      MachineMemOperand::MOStore |
-      MachineMemOperand::MODereferenceable |
-      MachineMemOperand::MOVolatile,
-      VT.getStoreSize(), 4);
+    EVT VT = Op.getValueType();
+
+    auto *M = cast<MemSDNode>(Op);
    unsigned Opcode = 0;

    switch (IntrID) {
@ -4384,7 +4574,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      llvm_unreachable("unhandled atomic opcode");
    }

-    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT, MMO);
+    return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
+                                   M->getMemOperand());
  }

  case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
@ -4397,17 +4588,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
      Op.getOperand(6), // offset
      Op.getOperand(7)  // slc
    };
-    EVT VT = Op.getOperand(4).getValueType();
-    MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(),
-      MachineMemOperand::MOLoad |
-      MachineMemOperand::MOStore |
-      MachineMemOperand::MODereferenceable |
-      MachineMemOperand::MOVolatile,
-      VT.getStoreSize(), 4);
+    EVT VT = Op.getValueType();
+    auto *M = cast<MemSDNode>(Op);

    return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
-                                   Op->getVTList(), Ops, VT, MMO);
+                                   Op->getVTList(), Ops, VT, M->getMemOperand());
  }

  // Basic sample.
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@ -28,8 +28,6 @@ using namespace llvm;

 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
  : AMDGPUMachineFunction(MF),
-    BufferPSV(*(MF.getSubtarget().getInstrInfo())),
-    ImagePSV(*(MF.getSubtarget().getInstrInfo())),
    PrivateSegmentBuffer(false),
    DispatchPtr(false),
    QueuePtr(false),
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@ -34,12 +34,14 @@ namespace llvm {

 class MachineFrameInfo;
 class MachineFunction;
+class SIInstrInfo;
 class TargetRegisterClass;

 class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
 public:
+  // TODO: Is the img rsrc useful?
  explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
-    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
+    PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}

  bool isConstant(const MachineFrameInfo *) const override {
    // This should probably be true for most images, but we will start by being
@ -135,8 +137,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
  // Stack object indices for work item IDs.
  std::array<int, 3> DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}};

-  AMDGPUBufferPseudoSourceValue BufferPSV;
-  AMDGPUImagePseudoSourceValue ImagePSV;
+  DenseMap<const Value *,
+           std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
+  DenseMap<const Value *,
+           std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;

 private:
  unsigned LDSWaveSpillSize = 0;
@ -629,12 +633,22 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
    return LDSWaveSpillSize;
  }

-  const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
-    return &BufferPSV;
+  const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
+                                                    const Value *BufferRsrc) {
+    assert(BufferRsrc);
+    auto PSV = BufferPSVs.try_emplace(
+      BufferRsrc,
+      llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
+    return PSV.first->second.get();
  }

-  const AMDGPUImagePseudoSourceValue *getImagePSV() const {
-    return &ImagePSV;
+  const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
+                                                  const Value *ImgRsrc) {
+    assert(ImgRsrc);
+    auto PSV = ImagePSVs.try_emplace(
+      ImgRsrc,
+      llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
+    return PSV.first->second.get();
  }
 };

--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@ -172,8 +172,8 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
 }

 unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
-  bool HasThumb2 = STI->getFeatureBits()[ARM::FeatureThumb2];
-  bool HasV8MBaselineOps = STI->getFeatureBits()[ARM::HasV8MBaselineOps];
+  bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2];
+  bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps];

  switch (Op) {
  default:
@ -389,7 +389,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
  case FK_SecRel_4:
    return Value;
  case ARM::fixup_arm_movt_hi16:
-    if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+    if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
      Value >>= 16;
    LLVM_FALLTHROUGH;
  case ARM::fixup_arm_movw_lo16: {
@ -401,7 +401,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
    return Value;
  }
  case ARM::fixup_t2_movt_hi16:
-    if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+    if (IsResolved || !STI.getTargetTriple().isOSBinFormatELF())
      Value >>= 16;
    LLVM_FALLTHROUGH;
  case ARM::fixup_t2_movw_lo16: {
@ -591,7 +591,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
  case ARM::fixup_arm_thumb_cp:
    // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
    // could have an error on our hands.
-    if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+    if (!STI.getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
      if (FixupDiagnostic) {
        Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@ -615,8 +615,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
  }
  case ARM::fixup_arm_thumb_br:
    // Offset by 4 and don't encode the lower bit, which is always 0.
-    if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
-        !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+    if (!STI.getFeatureBits()[ARM::FeatureThumb2] &&
+        !STI.getFeatureBits()[ARM::HasV8MBaselineOps]) {
      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
      if (FixupDiagnostic) {
        Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@ -626,7 +626,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
    return ((Value - 4) >> 1) & 0x7ff;
  case ARM::fixup_arm_thumb_bcc:
    // Offset by 4 and don't encode the lower bit, which is always 0.
-    if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
+    if (!STI.getFeatureBits()[ARM::FeatureThumb2]) {
      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
      if (FixupDiagnostic) {
        Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@ -1154,51 +1154,52 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
 }

 MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TheTriple, StringRef CPU,
                                        const MCTargetOptions &Options,
                                        bool isLittle) {
+  const Triple &TheTriple = STI.getTargetTriple();
  switch (TheTriple.getObjectFormat()) {
  default:
    llvm_unreachable("unsupported object format");
  case Triple::MachO: {
    MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
-    return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
+    return new ARMAsmBackendDarwin(T, STI, MRI, CS);
  }
  case Triple::COFF:
    assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
-    return new ARMAsmBackendWinCOFF(T, TheTriple);
+    return new ARMAsmBackendWinCOFF(T, STI);
  case Triple::ELF:
    assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
    uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-    return new ARMAsmBackendELF(T, TheTriple, OSABI, isLittle);
+    return new ARMAsmBackendELF(T, STI, OSABI, isLittle);
  }
 }

 MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU,
                                          const MCTargetOptions &Options) {
-  return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
+  return createARMAsmBackend(T, STI, MRI, Options, true);
 }

 MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU,
                                          const MCTargetOptions &Options) {
-  return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
+  return createARMAsmBackend(T, STI, MRI, Options, false);
 }

 MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
+                                            const MCSubtargetInfo &STI,
                                            const MCRegisterInfo &MRI,
-                                            const Triple &TT, StringRef CPU,
                                            const MCTargetOptions &Options) {
-  return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
+  return createARMAsmBackend(T, STI, MRI, Options, true);
 }

 MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
+                                            const MCSubtargetInfo &STI,
                                            const MCRegisterInfo &MRI,
-                                            const Triple &TT, StringRef CPU,
                                            const MCTargetOptions &Options) {
-  return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
+  return createARMAsmBackend(T, STI, MRI, Options, false);
 }
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@ -19,22 +19,20 @@
 namespace llvm {

 class ARMAsmBackend : public MCAsmBackend {
-  const MCSubtargetInfo *STI;
+  const MCSubtargetInfo &STI;
  bool isThumbMode;    // Currently emitting Thumb code.
  bool IsLittleEndian; // Big or little endian.
 public:
-  ARMAsmBackend(const Target &T, const Triple &TT, bool IsLittle)
-      : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")),
-        isThumbMode(TT.getArchName().startswith("thumb")),
+  ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI, bool IsLittle)
+      : MCAsmBackend(), STI(STI),
+        isThumbMode(STI.getTargetTriple().isThumb()),
        IsLittleEndian(IsLittle) {}

-  ~ARMAsmBackend() override { delete STI; }
-
  unsigned getNumFixupKinds() const override {
    return ARM::NumTargetFixupKinds;
  }

-  bool hasNOP() const { return STI->getFeatureBits()[ARM::HasV6T2Ops]; }
+  bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }

  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;

--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@ -19,10 +19,10 @@ class ARMAsmBackendDarwin : public ARMAsmBackend {
  const MCRegisterInfo &MRI;
 public:
  const MachO::CPUSubTypeARM Subtype;
-  ARMAsmBackendDarwin(const Target &T, const Triple &TT,
+  ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
                      const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
-      : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
-  }
+      : ARMAsmBackend(T, STI, /* IsLittleEndian */ true), MRI(MRI),
+        Subtype(st) {}

  std::unique_ptr<MCObjectWriter>
  createObjectWriter(raw_pwrite_stream &OS) const override {
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@ -20,9 +20,9 @@ namespace {
 class ARMAsmBackendELF : public ARMAsmBackend {
 public:
  uint8_t OSABI;
-  ARMAsmBackendELF(const Target &T, const Triple &TT, uint8_t OSABI,
+  ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI,
                   bool IsLittle)
-      : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
+      : ARMAsmBackend(T, STI, IsLittle), OSABI(OSABI) {}

  std::unique_ptr<MCObjectWriter>
  createObjectWriter(raw_pwrite_stream &OS) const override {
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@ -17,8 +17,8 @@ using namespace llvm;
 namespace {
 class ARMAsmBackendWinCOFF : public ARMAsmBackend {
 public:
-  ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple)
-      : ARMAsmBackend(T, TheTriple, true) {}
+  ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI)
+      : ARMAsmBackend(T, STI, true) {}
  std::unique_ptr<MCObjectWriter>
  createObjectWriter(raw_pwrite_stream &OS) const override {
    return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@ -68,27 +68,27 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);

-MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                  const MCRegisterInfo &MRI,
                                  const MCTargetOptions &Options,
                                  bool IsLittleEndian);

-MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);

-MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU,
+MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);

 MCAsmBackend *createThumbLEAsmBackend(const Target &T,
+                                      const MCSubtargetInfo &STI,
                                      const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU,
                                      const MCTargetOptions &Options);

 MCAsmBackend *createThumbBEAsmBackend(const Target &T,
+                                      const MCSubtargetInfo &STI,
                                      const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU,
                                      const MCTargetOptions &Options);

 // Construct a PE/COFF machine code streamer which will generate a PE/COFF
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@ -476,10 +476,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
  }
 }

-MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU,
+MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                  const MCRegisterInfo &MRI,
                                  const llvm::MCTargetOptions &TO) {
-  return new AVRAsmBackend(TT.getOS());
+  return new AVRAsmBackend(STI.getTargetTriple().getOS());
 }

 } // end of namespace llvm
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@ -26,6 +26,7 @@ class MCContext;
 class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
+class MCSubtargetInfo;
 class MCTargetOptions;
 class StringRef;
 class Target;
@ -42,8 +43,8 @@ MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
                                      MCContext &Ctx);

 /// Creates an assembly backend for AVR.
-MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU,
+MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                  const MCRegisterInfo &MRI,
                                  const llvm::MCTargetOptions &TO);

 /// Creates an ELF object writer for AVR.
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@ -104,15 +104,15 @@ BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
 }

 MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU,
-                                        const MCTargetOptions&) {
+                                        const MCTargetOptions &) {
  return new BPFAsmBackend(/*IsLittleEndian=*/true);
 }

 MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU,
-                                          const MCTargetOptions&) {
+                                          const MCTargetOptions &) {
  return new BPFAsmBackend(/*IsLittleEndian=*/false);
 }
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@ -45,11 +45,11 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);

-MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU,
+MCAsmBackend *createBPFAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                  const MCRegisterInfo &MRI,
                                  const MCTargetOptions &Options);
-MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU,
+MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter> createBPFELFObjectWriter(raw_pwrite_stream &OS,
--- a/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@ -1050,14 +1050,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
  // Check if the exit values have types that are no wider than the type
  // that we want to promote to.
  unsigned DestBW = DestTy->getBitWidth();
-  for (Instruction &In : *ExitB) {
-    PHINode *P = dyn_cast<PHINode>(&In);
-    if (!P)
-      break;
-    if (P->getNumIncomingValues() != 1)
+  for (PHINode &P : ExitB->phis()) {
+    if (P.getNumIncomingValues() != 1)
      return false;
-    assert(P->getIncomingBlock(0) == LoopB);
-    IntegerType *T = dyn_cast<IntegerType>(P->getType());
+    assert(P.getIncomingBlock(0) == LoopB);
+    IntegerType *T = dyn_cast<IntegerType>(P.getType());
    if (!T || T->getBitWidth() > DestBW)
      return false;
  }
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
@ -2925,6 +2925,23 @@ let Predicates = [UseHVX] in {
 def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
 def vzero: PatFrag<(ops), (HexagonVZERO)>;

+def VSxtb: OutPatFrag<(ops node:$Vs),
+           (V6_vshuffvdd (HiVec (V6_vsb $Vs)),
+                         (LoVec (V6_vsb $Vs)),
+                         (A2_tfrsi -2))>;
+def VSxth: OutPatFrag<(ops node:$Vs),
+           (V6_vshuffvdd (HiVec (V6_vsh $Vs)),
+                         (LoVec (V6_vsh $Vs)),
+                         (A2_tfrsi -4))>;
+def VZxtb: OutPatFrag<(ops node:$Vs),
+           (V6_vshuffvdd (HiVec (V6_vzb $Vs)),
+                         (LoVec (V6_vzb $Vs)),
+                         (A2_tfrsi -2))>;
+def VZxth: OutPatFrag<(ops node:$Vs),
+           (V6_vshuffvdd (HiVec (V6_vzh $Vs)),
+                         (LoVec (V6_vzh $Vs)),
+                         (A2_tfrsi -4))>;
+
 let Predicates = [UseHVX] in {
  def: Pat<(VecI8  vzero), (V6_vd0)>;
  def: Pat<(VecI16 vzero), (V6_vd0)>;
@ -2970,25 +2987,18 @@ let Predicates = [UseHVX] in {
  def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
           (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;

-  def: Pat<(VecPI16 (sext HVI8:$Vs)),  (V6_vsb HvxVR:$Vs)>;
-  def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>;
-  def: Pat<(VecPI16 (zext HVI8:$Vs)),  (V6_vzb HvxVR:$Vs)>;
-  def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>;
+  def: Pat<(VecPI16 (sext HVI8:$Vs)),  (VSxtb $Vs)>;
+  def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
+  def: Pat<(VecPI16 (zext HVI8:$Vs)),  (VZxtb $Vs)>;
+  def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;

-  def: Pat<(sext_inreg HVI32:$Vs, v16i16),
-           (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
-                       (HiVec (V6_vsh HvxVR:$Vs)))>;
-  def: Pat<(sext_inreg HVI32:$Vs, v32i16),
-           (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
-                       (HiVec (V6_vsh HvxVR:$Vs)))>;
-
-  def: Pat<(VecI16 (sext_invec HVI8:$Vs)),  (LoVec (V6_vsb HvxVR:$Vs))>;
-  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>;
+  def: Pat<(VecI16 (sext_invec HVI8:$Vs)),  (LoVec (VSxtb $Vs))>;
+  def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
  def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
-           (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>;
+           (LoVec (VSxth (LoVec (VSxtb $Vs))))>;

-  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (V6_vzb HvxVR:$Vs))>;
-  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>;
+  def: Pat<(VecI16 (zext_invec HVI8:$Vs)),  (LoVec (VZxtb $Vs))>;
+  def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
  def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
-           (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>;
+           (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
 }
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@ -765,11 +765,12 @@ class HexagonAsmBackend : public MCAsmBackend {

 // MCAsmBackend
 MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
-                                      MCRegisterInfo const & /*MRI*/,
-                                      const Triple &TT, StringRef CPU,
-                                      const MCTargetOptions &Options) {
+                                            const MCSubtargetInfo &STI,
+                                            MCRegisterInfo const & /*MRI*/,
+                                            const MCTargetOptions &Options) {
+  const Triple &TT = STI.getTargetTriple();
  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());

-  StringRef CPUString = Hexagon_MC::selectHexagonCPU(CPU);
+  StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU());
  return new HexagonAsmBackend(T, TT, OSABI, CPUString);
 }
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@ -61,8 +61,8 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
                                          MCContext &MCT);

 MCAsmBackend *createHexagonAsmBackend(const Target &T,
+                                      const MCSubtargetInfo &STI,
                                      const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU,
                                      const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@ -165,9 +165,10 @@ LanaiAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
 } // namespace

 MCAsmBackend *llvm::createLanaiAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo & /*MRI*/,
-                                          const Triple &TT, StringRef /*CPU*/,
                                          const MCTargetOptions & /*Options*/) {
+  const Triple &TT = STI.getTargetTriple();
  if (!TT.isOSBinFormatELF())
    llvm_unreachable("OS not supported");

--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@ -38,8 +38,8 @@ MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);

-MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TheTriple, StringRef CPU,
+MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@ -476,8 +476,9 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
 }

 MCAsmBackend *llvm::createMipsAsmBackend(const Target &T,
+                                         const MCSubtargetInfo &STI,
                                         const MCRegisterInfo &MRI,
-                                         const Triple &TT, StringRef CPU,
                                         const MCTargetOptions &Options) {
-  return new MipsAsmBackend(T, MRI, TT, CPU, Options.ABIName == "n32");
+  return new MipsAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(),
+                            Options.ABIName == "n32");
 }
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@ -45,8 +45,8 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);

-MCAsmBackend *createMipsAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                   const Triple &TT, StringRef CPU,
+MCAsmBackend *createMipsAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                   const MCRegisterInfo &MRI,
                                   const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@ -3863,13 +3863,17 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
    case 'c': // register suitable for indirect jump
      if (VT == MVT::i32)
        return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass);
-      assert(VT == MVT::i64 && "Unexpected type.");
-      return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
-    case 'l': // register suitable for indirect jump
+      if (VT == MVT::i64)
+        return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass);
+      // This will generate an error message
+      return std::make_pair(0U, nullptr);
+    case 'l': // use the `lo` register to store values
+              // that are no bigger than a word
      if (VT == MVT::i32)
        return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass);
      return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass);
-    case 'x': // register suitable for indirect jump
+    case 'x': // use the concatenated `hi` and `lo` registers
+              // to store doubleword values
      // Fixme: Not triggering the use of both hi and low
      // This will generate an error message
      return std::make_pair(0U, nullptr);
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@ -18,6 +18,7 @@
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
@ -231,9 +232,10 @@ namespace {
 } // end anonymous namespace

 MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU,
                                        const MCTargetOptions &Options) {
+  const Triple &TT = STI.getTargetTriple();
  if (TT.isOSDarwin())
    return new DarwinPPCAsmBackend(T);

--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@ -29,6 +29,7 @@ class MCContext;
 class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
+class MCSubtargetInfo;
 class MCTargetOptions;
 class Target;
 class Triple;
@ -43,8 +44,8 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
                                      MCContext &Ctx);

-MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU,
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                  const MCRegisterInfo &MRI,
                                  const MCTargetOptions &Options);

 /// Construct an PPC ELF object writer.
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@ -4397,13 +4397,18 @@ hasSameArgumentList(const Function *CallerFn, ImmutableCallSite CS) {
 static bool
 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
                                    CallingConv::ID CalleeCC) {
-  // Tail or Sibling call optimization (TCO/SCO) needs callee and caller to
-  // have the same calling convention.
-  if (CallerCC != CalleeCC)
+  // Tail calls are possible with fastcc and ccc.
+  auto isTailCallableCC  = [] (CallingConv::ID CC){
+      return  CC == CallingConv::C || CC == CallingConv::Fast;
+  };
+  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
    return false;

-  // Tail or Sibling calls can be done with fastcc/ccc.
-  return (CallerCC == CallingConv::Fast || CallerCC == CallingConv::C);
+  // We can safely tail call both fastcc and ccc callees from a c calling
+  // convention caller. If the caller is fastcc, we may have less stack space
+  // than a non-fastcc caller with the same signature so disable tail-calls in
+  // that case.
+  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
 }

 bool
@ -4434,10 +4439,28 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
  // Callee contains any byval parameter is not supported, too.
  // Note: This is a quick work around, because in some cases, e.g.
  // caller's stack size > callee's stack size, we are still able to apply
-  // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
+  // sibling call optimization. For example, gcc is able to do SCO for caller1
+  // in the following example, but not for caller2.
+  //   struct test {
+  //     long int a;
+  //     char ary[56];
+  //   } gTest;
+  //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
+  //     b->a = v.a;
+  //     return 0;
+  //   }
+  //   void caller1(struct test a, struct test c, struct test *b) {
+  //     callee(gTest, b); }
+  //   void caller2(struct test *b) { callee(gTest, b); }
  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
    return false;

+  // If callee and caller use different calling conventions, we cannot pass
+  // parameters on stack since offsets for the parameter area may be different.
+  if (Caller.getCallingConv() != CalleeCC &&
+      needStackSlotPassParameters(Subtarget, Outs))
+    return false;
+
  // No TCO/SCO on indirect call because Caller have to restore its TOC
  if (!isFunctionGlobalAddress(Callee) &&
      !isa<ExternalSymbolSDNode>(Callee))
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@ -2433,7 +2433,7 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
    int64_t MB = MI.getOperand(3).getImm();
    APInt InVal(Opc == PPC::RLDICL ? 64 : 32, SExtImm, true);
    InVal = InVal.rotl(SH);
-    uint64_t Mask = (1LU << (63 - MB + 1)) - 1;
+    uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;
    InVal &= Mask;
    // Can't replace negative values with an LI as that will sign-extend
    // and not clear the left bits. If we're setting the CR bit, we will use
@ -2457,8 +2457,8 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
    int64_t ME = MI.getOperand(4).getImm();
    APInt InVal(32, SExtImm, true);
    InVal = InVal.rotl(SH);
-    // Set the bits (       MB + 32      ) to (       ME + 32      ).
-    uint64_t Mask = ((1 << (32 - MB)) - 1) & ~((1 << (31 - ME)) - 1);
+    // Set the bits (        MB + 32       ) to (        ME + 32       ).
+    uint64_t Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);
    InVal &= Mask;
    // Can't replace negative values with an LI as that will sign-extend
    // and not clear the left bits. If we're setting the CR bit, we will use
@ -2527,6 +2527,7 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
  III.ConstantOpNo = 2;
  III.ImmWidth = 16;
  III.ImmMustBeMultipleOf = 1;
+  III.TruncateImmTo = 0;
  switch (Opc) {
  default: return false;
  case PPC::ADD4:
@ -2600,10 +2601,6 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
  case PPC::RLWNM8:
  case PPC::RLWNMo:
  case PPC::RLWNM8o:
-  case PPC::RLDCL:
-  case PPC::RLDCLo:
-  case PPC::RLDCR:
-  case PPC::RLDCRo:
  case PPC::SLW:
  case PPC::SLW8:
  case PPC::SLWo:
@ -2614,6 +2611,50 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
  case PPC::SRW8o:
  case PPC::SRAW:
  case PPC::SRAWo:
+    III.SignedImm = false;
+    III.ZeroIsSpecialOrig = 0;
+    III.ZeroIsSpecialNew = 0;
+    III.IsCommutative = false;
+    // This isn't actually true, but the instructions ignore any of the
+    // upper bits, so any immediate loaded with an LI is acceptable.
+    // This does not apply to shift right algebraic because a value
+    // out of range will produce a -1/0.
+    III.ImmWidth = 16;
+    if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 ||
+        Opc == PPC::RLWNMo || Opc == PPC::RLWNM8o)
+      III.TruncateImmTo = 5;
+    else
+      III.TruncateImmTo = 6;
+    switch(Opc) {
+    default: llvm_unreachable("Unknown opcode");
+    case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
+    case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
+    case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
+    case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
+    case PPC::SRAW:
+      III.ImmWidth = 5;
+      III.TruncateImmTo = 0;
+      III.ImmOpcode = PPC::SRAWI;
+      break;
+    case PPC::SRAWo:
+      III.ImmWidth = 5;
+      III.TruncateImmTo = 0;
+      III.ImmOpcode = PPC::SRAWIo;
+      break;
+    }
+    break;
+  case PPC::RLDCL:
+  case PPC::RLDCLo:
+  case PPC::RLDCR:
+  case PPC::RLDCRo:
  case PPC::SLD:
  case PPC::SLDo:
  case PPC::SRD:
@ -2626,33 +2667,34 @@ bool PPCInstrInfo::instrHasImmForm(const MachineInstr &MI,
    III.IsCommutative = false;
    // This isn't actually true, but the instructions ignore any of the
    // upper bits, so any immediate loaded with an LI is acceptable.
+    // This does not apply to shift right algebraic because a value
+    // out of range will produce a -1/0.
    III.ImmWidth = 16;
+    if (Opc == PPC::RLDCL || Opc == PPC::RLDCLo ||
+        Opc == PPC::RLDCR || Opc == PPC::RLDCRo)
+      III.TruncateImmTo = 6;
+    else
+      III.TruncateImmTo = 7;
    switch(Opc) {
    default: llvm_unreachable("Unknown opcode");
-    case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
-    case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::RLWNMo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::RLWNM8o: III.ImmOpcode = PPC::RLWINM8o; break;
    case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
    case PPC::RLDCLo: III.ImmOpcode = PPC::RLDICLo; break;
    case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
    case PPC::RLDCRo: III.ImmOpcode = PPC::RLDICRo; break;
-    case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
-    case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::SLWo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::SLW8o: III.ImmOpcode = PPC::RLWINM8o; break;
-    case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
-    case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
-    case PPC::SRWo: III.ImmOpcode = PPC::RLWINMo; break;
-    case PPC::SRW8o: III.ImmOpcode = PPC::RLWINM8o; break;
-    case PPC::SRAW: III.ImmOpcode = PPC::SRAWI; break;
-    case PPC::SRAWo: III.ImmOpcode = PPC::SRAWIo; break;
    case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
    case PPC::SLDo: III.ImmOpcode = PPC::RLDICRo; break;
    case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
    case PPC::SRDo: III.ImmOpcode = PPC::RLDICLo; break;
-    case PPC::SRAD: III.ImmOpcode = PPC::SRADI; break;
-    case PPC::SRADo: III.ImmOpcode = PPC::SRADIo; break;
+    case PPC::SRAD:
+      III.ImmWidth = 6;
+      III.TruncateImmTo = 0;
+      III.ImmOpcode = PPC::SRADI;
+       break;
+    case PPC::SRADo:
+      III.ImmWidth = 6;
+      III.TruncateImmTo = 0;
+      III.ImmOpcode = PPC::SRADIo;
+      break;
    }
    break;
  // Loads and stores:
@ -2866,6 +2908,8 @@ bool PPCInstrInfo::transformToImmForm(MachineInstr &MI, const ImmInstrInfo &III,
    return false;
  if (Imm % III.ImmMustBeMultipleOf)
    return false;
+  if (III.TruncateImmTo)
+    Imm &= ((1 << III.TruncateImmTo) - 1);
  if (III.SignedImm) {
    APInt ActualValue(64, Imm, true);
    if (!ActualValue.isSignedIntN(III.ImmWidth))
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@ -97,6 +97,8 @@ struct ImmInstrInfo {
  uint64_t ImmOpcode : 16;
  // The size of the immediate.
  uint64_t ImmWidth : 5;
+  // The immediate should be truncated to N bits.
+  uint64_t TruncateImmTo : 5;
 };

 // Information required to convert an instruction to just a materialized
--- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@ -55,7 +55,7 @@ FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
                            "convert reg-reg instructions to reg-imm"));

 static cl::opt<bool>
-ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(false),
+ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
              cl::desc("Convert eligible reg+reg instructions to reg+imm"));

 static cl::opt<bool>
--- a/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
@ -35,7 +35,7 @@ STATISTIC(NumRemovedInPreEmit,
          "Number of instructions deleted in pre-emit peephole");

 static cl::opt<bool>
-RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(false),
+RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
                   cl::desc("Run pre-emit peephole optimizations."));

 namespace {
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@ -230,9 +230,10 @@ RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
 } // end anonymous namespace

 MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU,
                                          const MCTargetOptions &Options) {
+  const Triple &TT = STI.getTargetTriple();
  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
  return new RISCVAsmBackend(OSABI, TT.isArch64Bit());
 }
--- a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@ -40,8 +40,8 @@ MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);

-MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU,
+MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter>
--- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@ -580,7 +580,6 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
  }

  MachineFunction &MF = DAG.getMachineFunction();
-  MVT XLenVT = Subtarget.getXLenVT();
  EVT PtrVT = getPointerTy(DAG.getDataLayout());

  if (IsVarArg)
@ -593,7 +592,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(

  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
-    assert(VA.getLocVT() == XLenVT && "Unhandled argument type");
+    assert(VA.getLocVT() == Subtarget.getXLenVT() && "Unhandled argument type");
    SDValue ArgValue;
    if (VA.isRegLoc())
      ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL);
--- a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@ -177,7 +177,7 @@ class CS_ALU<bits<2> funct2, string OpcodeStr, RegisterClass cls,

 let Predicates = [HasStdExtC] in {

-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [X2] in
 def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
                             (ins SP:$rs1, uimm10_lsb00nonzero:$imm),
                             "c.addi4spn", "$rd, $rs1, $imm"> {
@ -260,7 +260,7 @@ def C_ADDI : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
 }

 let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1,
-    DecoderNamespace = "RISCV32Only_" in
+    DecoderNamespace = "RISCV32Only_", Defs = [X1] in
 def C_JAL : RVInst16CJ<0b001, 0b01, (outs), (ins simm12_lsb0:$offset),
                       "c.jal", "$offset">,
            Requires<[IsRV32]>;
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@ -14,6 +14,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/TargetRegistry.h"

@ -301,8 +302,8 @@ namespace {
 } // end anonymous namespace

 MCAsmBackend *llvm::createSparcAsmBackend(const Target &T,
+                                          const MCSubtargetInfo &STI,
                                          const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU,
                                          const MCTargetOptions &Options) {
-  return new ELFSparcAsmBackend(T, TT.getOS());
+  return new ELFSparcAsmBackend(T, STI.getTargetTriple().getOS());
 }
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@ -40,8 +40,8 @@ Target &getTheSparcelTarget();
 MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
                                        MCContext &Ctx);
-MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU,
+MCAsmBackend *createSparcAsmBackend(const Target &T, const MCSubtargetInfo &STI,
+                                    const MCRegisterInfo &MRI,
                                    const MCTargetOptions &Options);
 std::unique_ptr<MCObjectWriter>
 createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@ -14,6 +14,7 @@
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"

 using namespace llvm;

@ -122,9 +123,10 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
 }

 MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
+                                              const MCSubtargetInfo &STI,
                                              const MCRegisterInfo &MRI,
-                                              const Triple &TT, StringRef CPU,
                                              const MCTargetOptions &Options) {
-  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+  uint8_t OSABI =
+      MCELFObjectTargetWriter::getOSABI(STI.getTargetTriple().getOS());
  return new SystemZMCAsmBackend(OSABI);
 }
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@ -89,8 +89,8 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
                                          MCContext &Ctx);

 MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
+                                        const MCSubtargetInfo &STI,
                                        const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU,
                                        const MCTargetOptions &Options);

 std::unique_ptr<MCObjectWriter> createSystemZObjectWriter(raw_pwrite_stream &OS,
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@ -69,10 +69,10 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
 }

 static MCAsmBackend *createAsmBackend(const Target & /*T*/,
+                                      const MCSubtargetInfo &STI,
                                      const MCRegisterInfo & /*MRI*/,
-                                      const Triple &TT, StringRef /*CPU*/,
                                      const MCTargetOptions & /*Options*/) {
-  return createWebAssemblyAsmBackend(TT);
+  return createWebAssemblyAsmBackend(STI.getTargetTriple());
 }

 static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@ -843,10 +843,11 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
 } // end anonymous namespace

 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+                                           const MCSubtargetInfo &STI,
                                           const MCRegisterInfo &MRI,
-                                           const Triple &TheTriple,
-                                           StringRef CPU,
                                           const MCTargetOptions &Options) {
+  const Triple &TheTriple = STI.getTargetTriple();
+  StringRef CPU = STI.getCPU();
  if (TheTriple.isOSBinFormatMachO())
    return new DarwinX86_32AsmBackend(T, MRI, CPU);

@ -862,10 +863,11 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
 }

 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+                                           const MCSubtargetInfo &STI,
                                           const MCRegisterInfo &MRI,
-                                           const Triple &TheTriple,
-                                           StringRef CPU,
                                           const MCTargetOptions &Options) {
+  const Triple &TheTriple = STI.getTargetTriple();
+  StringRef CPU = STI.getCPU();
  if (TheTriple.isOSBinFormatMachO()) {
    MachO::CPUSubTypeX86 CS =
        StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@ -70,11 +70,13 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                      const MCRegisterInfo &MRI,
                                      MCContext &Ctx);

-MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU,
+MCAsmBackend *createX86_32AsmBackend(const Target &T,
+                                     const MCSubtargetInfo &STI,
+                                     const MCRegisterInfo &MRI,
                                     const MCTargetOptions &Options);
-MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU,
+MCAsmBackend *createX86_64AsmBackend(const Target &T,
+                                     const MCSubtargetInfo &STI,
+                                     const MCRegisterInfo &MRI,
                                     const MCTargetOptions &Options);

 /// Implements X86-only directives for assembly emission.
--- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@ -166,81 +166,6 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
  return true;
 }

-/// Check if register \p Reg is live after the \p MI.
-///
-/// \p LiveRegs should be in a state describing liveness information in
-/// that exact place as this function tries to precise analysis made
-/// by \p LiveRegs by exploiting the information about particular
-/// instruction \p MI. \p MI is expected to be one of the MOVs handled
-/// by the x86FixupBWInsts pass.
-/// Note: similar to LivePhysRegs::contains this would state that
-/// super-register is not used if only some part of it is used.
-///
-/// X86 backend does not have subregister liveness tracking enabled,
-/// so liveness information might be overly conservative. However, for
-/// some specific instructions (this pass only cares about MOVs) we can
-/// produce more precise results by analysing that MOV's operands.
-///
-/// Indeed, if super-register is not live before the mov it means that it
-/// was originally <read-undef> and so we are free to modify these
-/// undef upper bits. That may happen in case where the use is in another MBB
-/// and the vreg/physreg corresponding to the move has higher width than
-/// necessary (e.g. due to register coalescing with a "truncate" copy).
-/// So, it handles pattern like this:
-///
-///   %bb.2: derived from LLVM BB %if.then
-///   Live Ins: %rdi
-///   Predecessors according to CFG: %bb.0
-///   %ax = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax;
-///   mem:LD2[%p]
-///                                             No implicit %eax
-///   Successors according to CFG: %bb.3(?%)
-///
-///   %bb.3: derived from LLVM BB %if.end
-///   Live Ins: %eax                            Only %ax is actually live
-///   Predecessors according to CFG: %bb.2 %bb.1
-///   %ax = KILL %ax, implicit killed %eax
-///   RET 0, %ax
-static bool isLive(const MachineInstr &MI,
-                   const LivePhysRegs &LiveRegs,
-                   const TargetRegisterInfo *TRI,
-                   unsigned Reg) {
-  if (!LiveRegs.contains(Reg))
-    return false;
-
-  unsigned Opc = MI.getOpcode(); (void)Opc;
-  // These are the opcodes currently handled by the pass, if something
-  // else will be added we need to ensure that new opcode has the same
-  // properties.
-  assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr ||
-          Opc == X86::MOV16rr) &&
-         "Unexpected opcode.");
-
-  bool IsDefined = false;
-  for (auto &MO: MI.implicit_operands()) {
-    if (!MO.isReg())
-      continue;
-
-    assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
-
-    for (MCSuperRegIterator Supers(Reg, TRI, true); Supers.isValid(); ++Supers) {
-      if (*Supers == MO.getReg()) {
-        if (MO.isDef())
-          IsDefined = true;
-        else
-          return true; // SuperReg Imp-used' -> live before the MI
-      }
-    }
-  }
-  // Reg is not Imp-def'ed -> it's live both before/after the instruction.
-  if (!IsDefined)
-    return true;
-
-  // Otherwise, the Reg is not live before the MI and the MOV can't
-  // make it really live, so it's in fact dead even after the MI.
-  return false;
-}
-
 /// \brief Check if after \p OrigMI the only portion of super register
 /// of the destination register of \p OrigMI that is alive is that
 /// destination register.
@ -262,20 +187,84 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
  if (SubRegIdx == X86::sub_8bit_hi)
    return false;

-  if (isLive(*OrigMI, LiveRegs, TRI, SuperDestReg))
-    return false;
-
-  if (SubRegIdx == X86::sub_8bit) {
-    // In the case of byte registers, we also have to check that the upper
-    // byte register is also dead. That is considered to be independent of
-    // whether the super-register is dead.
-    unsigned UpperByteReg =
-        getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true);
-
-    if (isLive(*OrigMI, LiveRegs, TRI, UpperByteReg))
-      return false;
+  // If neither the destination-super register nor any applicable subregisters
+  // are live after this instruction, then the super register is safe to use.
+  if (!LiveRegs.contains(SuperDestReg)) {
+    // If the original destination register was not the low 8-bit subregister
+    // then the super register check is sufficient.
+    if (SubRegIdx != X86::sub_8bit)
+      return true;
+    // If the original destination register was the low 8-bit subregister and
+    // we also need to check the 16-bit subregister and the high 8-bit
+    // subregister.
+    if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) &&
+        !LiveRegs.contains(getX86SubSuperRegister(SuperDestReg, 8,
+                                                  /*High=*/true)))
+      return true;
+    // Otherwise, we have a little more checking to do.
  }

+  // If we get here, the super-register destination (or some part of it) is
+  // marked as live after the original instruction.
+  //
+  // The X86 backend does not have subregister liveness tracking enabled,
+  // so liveness information might be overly conservative. Specifically, the
+  // super register might be marked as live because it is implicitly defined
+  // by the instruction we are examining.
+  //
+  // However, for some specific instructions (this pass only cares about MOVs)
+  // we can produce more precise results by analysing that MOV's operands.
+  //
+  // Indeed, if super-register is not live before the mov it means that it
+  // was originally <read-undef> and so we are free to modify these
+  // undef upper bits. That may happen in case where the use is in another MBB
+  // and the vreg/physreg corresponding to the move has higher width than
+  // necessary (e.g. due to register coalescing with a "truncate" copy).
+  // So, we would like to handle patterns like this:
+  //
+  //   %bb.2: derived from LLVM BB %if.then
+  //   Live Ins: %rdi
+  //   Predecessors according to CFG: %bb.0
+  //   %ax<def> = MOV16rm killed %rdi, 1, %noreg, 0, %noreg, implicit-def %eax
+  //                                 ; No implicit %eax
+  //   Successors according to CFG: %bb.3(?%)
+  //
+  //   %bb.3: derived from LLVM BB %if.end
+  //   Live Ins: %eax                            Only %ax is actually live
+  //   Predecessors according to CFG: %bb.2 %bb.1
+  //   %ax = KILL %ax, implicit killed %eax
+  //   RET 0, %ax
+  unsigned Opc = OrigMI->getOpcode(); (void)Opc;
+  // These are the opcodes currently handled by the pass, if something
+  // else will be added we need to ensure that new opcode has the same
+  // properties.
+  assert((Opc == X86::MOV8rm || Opc == X86::MOV16rm || Opc == X86::MOV8rr ||
+          Opc == X86::MOV16rr) &&
+         "Unexpected opcode.");
+
+  bool IsDefined = false;
+  for (auto &MO: OrigMI->implicit_operands()) {
+    if (!MO.isReg())
+      continue;
+
+    assert((MO.isDef() || MO.isUse()) && "Expected Def or Use only!");
+
+    for (MCSuperRegIterator Supers(OrigDestReg, TRI, true); Supers.isValid();
+         ++Supers) {
+      if (*Supers == MO.getReg()) {
+        if (MO.isDef())
+          IsDefined = true;
+        else
+          return false; // SuperReg Imp-used' -> live before the MI
+      }
+    }
+  }
+  // Reg is not Imp-def'ed -> it's live both before/after the instruction.
+  if (!IsDefined)
+    return false;
+
+  // Otherwise, the Reg is not live before the MI and the MOV can't
+  // make it really live, so it's in fact dead even after the MI.
  return true;
 }

--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -996,8 +996,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

    // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
    // even though v8i16 is a legal type.
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
    setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);

    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
@ -1151,15 +1151,26 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i1, Custom);

-    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1, Custom);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i1, Custom);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,  Custom);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i1,  Custom);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i1,  Custom);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i1,  Custom);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i1, MVT::v16i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i1, MVT::v16i32);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i1,  MVT::v8i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i1,  MVT::v8i32);
+    setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i1,  MVT::v4i32);
+    setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i1,  MVT::v4i32);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i1,  Custom);
    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i1,  Custom);

+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1,  MVT::v8i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1,  MVT::v8i32);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1,  MVT::v4i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1,  MVT::v4i32);
+    if (Subtarget.hasVLX()) {
+      setOperationAction(ISD::FP_TO_SINT,         MVT::v2i1,  Custom);
+      setOperationAction(ISD::FP_TO_UINT,         MVT::v2i1,  Custom);
+    }
+
    // Extends of v16i1/v8i1 to 128-bit vectors.
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
    setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i8, Custom);
@ -1186,9 +1197,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1,  Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
-    for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1,
-                     MVT::v16i1, MVT::v32i1, MVT::v64i1 })
-      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
+    for (auto VT : { MVT::v1i1, MVT::v8i1 })
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

    for (MVT VT : MVT::fp_vector_valuetypes())
      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
@ -1219,11 +1229,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    }

    setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v16i16, Promote);
-    setOperationAction(ISD::FP_TO_SINT,         MVT::v16i8, Promote);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
+    setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i8, Promote);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::v16i16, Promote);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
+    setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);

@ -1428,6 +1438,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
+    for (auto VT : { MVT::v16i1, MVT::v32i1 })
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

    // Extends from v32i1 masks to 256-bit vectors.
    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
@ -1540,6 +1552,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
+    for (auto VT : { MVT::v2i1, MVT::v4i1 })
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

    // Extends from v2i1/v4i1 masks to 128-bit vectors.
    setOperationAction(ISD::ZERO_EXTEND,      MVT::v4i32, Custom);
@ -2140,6 +2154,10 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
                               const SDLoc &Dl, SelectionDAG &DAG) {
  EVT ValVT = ValArg.getValueType();

+  if (ValVT == MVT::v1i1)
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
+                       DAG.getIntPtrConstant(0, Dl));
+
  if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
      (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
    // Two stage lowering might be required
@ -4625,6 +4643,14 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
  return Subtarget.hasLZCNT();
 }

+bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
+                                                EVT BitcastVT) const {
+  if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
+    return false;
+
+  return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
+}
+
 bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
                                         const SelectionDAG &DAG) const {
  // Do not merge to float value size (128 bytes) if no implicit
@ -7471,7 +7497,7 @@ static bool isAddSub(const BuildVectorSDNode *BV,
 }

 /// Returns true if is possible to fold MUL and an idiom that has already been
-/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into 
+/// recognized as ADDSUB/SUBADD(\p Opnd0, \p Opnd1) into
 /// FMADDSUB/FMSUBADD(x, y, \p Opnd1). If (and only if) true is returned, the
 /// operands of FMADDSUB/FMSUBADD are written to parameters \p Opnd0, \p Opnd1, \p Opnd2.
 ///
@ -7708,6 +7734,10 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
  case ISD::AND:
  case ISD::XOR:
  case ISD::OR:
+    // Don't do this if the buildvector is a splat - we'd replace one
+    // constant with an entire vector.
+    if (Op->getSplatValue())
+      return SDValue();
    if (!TLI.isOperationLegalOrPromote(Opcode, VT))
      return SDValue();
    break;
@ -11261,6 +11291,20 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
  MutableArrayRef<int> LoMask = Mask.slice(0, 4);
  MutableArrayRef<int> HiMask = Mask.slice(4, 4);

+  // Attempt to directly match PSHUFLW or PSHUFHW.
+  if (isUndefOrInRange(LoMask, 0, 4) &&
+      isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
+    return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V,
+                       getV4X86ShuffleImm8ForMask(LoMask, DL, DAG));
+  }
+  if (isUndefOrInRange(HiMask, 4, 8) &&
+      isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
+    for (int i = 0; i != 4; ++i)
+      HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4));
+    return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V,
+                       getV4X86ShuffleImm8ForMask(HiMask, DL, DAG));
+  }
+
  SmallVector<int, 4> LoInputs;
  copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
  std::sort(LoInputs.begin(), LoInputs.end());
@ -11280,13 +11324,11 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
  MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
  MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);

-  // If we are splatting two values from one half - one to each half, then
-  // we can shuffle that half so each is splatted to a dword, then splat those
-  // to their respective halves.
-  auto SplatHalfs = [&](int LoInput, int HiInput, unsigned ShufWOp,
-                        int DOffset) {
-    int PSHUFHalfMask[] = {LoInput % 4, LoInput % 4, HiInput % 4, HiInput % 4};
-    int PSHUFDMask[] = {DOffset + 0, DOffset + 0, DOffset + 1, DOffset + 1};
+  // If we are shuffling values from one half - check how many different DWORD
+  // pairs we need to create. If only 1 or 2 then we can perform this as a
+  // PSHUFLW/PSHUFHW + PSHUFD instead of the PSHUFD+PSHUFLW+PSHUFHW chain below.
+  auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask,
+                               ArrayRef<int> PSHUFDMask, unsigned ShufWOp) {
    V = DAG.getNode(ShufWOp, DL, VT, V,
                    getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG));
    V = DAG.getBitcast(PSHUFDVT, V);
@ -11295,10 +11337,48 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
    return DAG.getBitcast(VT, V);
  };

-  if (NumLToL == 1 && NumLToH == 1 && (NumHToL + NumHToH) == 0)
-    return SplatHalfs(LToLInputs[0], LToHInputs[0], X86ISD::PSHUFLW, 0);
-  if (NumHToL == 1 && NumHToH == 1 && (NumLToL + NumLToH) == 0)
-    return SplatHalfs(HToLInputs[0], HToHInputs[0], X86ISD::PSHUFHW, 2);
+  if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) {
+    int PSHUFDMask[4] = { -1, -1, -1, -1 };
+    SmallVector<std::pair<int, int>, 4> DWordPairs;
+    int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2);
+
+    // Collect the different DWORD pairs.
+    for (int DWord = 0; DWord != 4; ++DWord) {
+      int M0 = Mask[2 * DWord + 0];
+      int M1 = Mask[2 * DWord + 1];
+      M0 = (M0 >= 0 ? M0 % 4 : M0);
+      M1 = (M1 >= 0 ? M1 % 4 : M1);
+      if (M0 < 0 && M1 < 0)
+        continue;
+
+      bool Match = false;
+      for (int j = 0, e = DWordPairs.size(); j < e; ++j) {
+        auto &DWordPair = DWordPairs[j];
+        if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) &&
+            (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) {
+          DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first);
+          DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second);
+          PSHUFDMask[DWord] = DOffset + j;
+          Match = true;
+          break;
+        }
+      }
+      if (!Match) {
+        PSHUFDMask[DWord] = DOffset + DWordPairs.size();
+        DWordPairs.push_back(std::make_pair(M0, M1));
+      }
+    }
+
+    if (DWordPairs.size() <= 2) {
+      DWordPairs.resize(2, std::make_pair(-1, -1));
+      int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second,
+                              DWordPairs[1].first, DWordPairs[1].second};
+      if ((NumHToL + NumHToH) == 0)
+        return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW);
+      if ((NumLToL + NumLToH) == 0)
+        return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW);
+    }
+  }

  // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
  // such inputs we can swap two of the dwords across the half mark and end up
@ -15020,6 +15100,42 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
  return insert1BitVector(Op, DAG, Subtarget);
 }

+static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
+                                      SelectionDAG &DAG) {
+  assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
+         "Only vXi1 extract_subvectors need custom lowering");
+
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue Idx = Op.getOperand(1);
+
+  if (!isa<ConstantSDNode>(Idx))
+    return SDValue();
+
+  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0) // the operation is legal
+    return Op;
+
+  MVT VecVT = Vec.getSimpleValueType();
+  unsigned NumElems = VecVT.getVectorNumElements();
+
+  // Extend to natively supported kshift.
+  MVT WideVecVT = VecVT;
+  if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) {
+    WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
+    Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT,
+                      DAG.getUNDEF(WideVecVT), Vec,
+                      DAG.getIntPtrConstant(0, dl));
+  }
+
+  // Shift to the LSB.
+  Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
+                    DAG.getConstant(IdxVal, dl, MVT::i8));
+
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
+                     DAG.getIntPtrConstant(0, dl));
+}
+
 // Returns the appropriate wrapper opcode for a global reference.
 unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
  // References to absolute symbols are never PC-relative.
@ -15545,19 +15661,13 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                         DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
                                     DAG.getUNDEF(SrcVT)));
    }
-    if (SrcVT.getVectorElementType() == MVT::i1) {
-      if (SrcVT == MVT::v2i1) {
-        // For v2i1, we need to widen to v4i1 first.
-        assert(VT == MVT::v2f64 && "Unexpected type");
-        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
-                          DAG.getUNDEF(MVT::v2i1));
-        return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
-                           DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
-      }
-
-      MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
-      return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
-                         DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
+    if (SrcVT == MVT::v2i1) {
+      // For v2i1, we need to widen to v4i1 first.
+      assert(VT == MVT::v2f64 && "Unexpected type");
+      Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src,
+                        DAG.getUNDEF(MVT::v2i1));
+      return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(),
+                         DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src));
    }
    return SDValue();
  }
@ -15894,19 +16004,13 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG,
  MVT SrcVT = N0.getSimpleValueType();
  SDLoc dl(Op);

-  if (SrcVT.getVectorElementType() == MVT::i1) {
-    if (SrcVT == MVT::v2i1) {
-      // For v2i1, we need to widen to v4i1 first.
-      assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
-      N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
-                       DAG.getUNDEF(MVT::v2i1));
-      return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
-                         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
-    }
-
-    MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
-    return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
-                       DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
+  if (SrcVT == MVT::v2i1) {
+    // For v2i1, we need to widen to v4i1 first.
+    assert(Op.getValueType() == MVT::v2f64 && "Unexpected type");
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0,
+                     DAG.getUNDEF(MVT::v2i1));
+    return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64,
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0));
  }

  switch (SrcVT.SimpleTy) {
@ -16418,13 +16522,16 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
  if (InVT.getScalarSizeInBits() <= 16) {
    if (Subtarget.hasBWI()) {
      // legal, will go to VPMOVB2M, VPMOVW2M
-      // Shift packed bytes not supported natively, bitcast to word
-      MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
-      SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
-                                       DAG.getBitcast(ExtVT, In),
-                                       DAG.getConstant(ShiftInx, DL, ExtVT));
-      ShiftNode = DAG.getBitcast(InVT, ShiftNode);
-      return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+      if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
+        // We need to shift to get the lsb into sign position.
+        // Shift packed bytes not supported natively, bitcast to word
+        MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
+        In = DAG.getNode(ISD::SHL, DL, ExtVT,
+                         DAG.getBitcast(ExtVT, In),
+                         DAG.getConstant(ShiftInx, DL, ExtVT));
+        In = DAG.getBitcast(InVT, In);
+      }
+      return DAG.getNode(X86ISD::CVT2MASK, DL, VT, In);
    }
    // Use TESTD/Q, extended vector to packed dword/qword.
    assert((InVT.is256BitVector() || InVT.is128BitVector()) &&
@ -16437,9 +16544,12 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
    ShiftInx = InVT.getScalarSizeInBits() - 1;
  }

-  SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
-                                   DAG.getConstant(ShiftInx, DL, InVT));
-  return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
+  if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) {
+    // We need to shift to get the lsb into sign position.
+    In = DAG.getNode(ISD::SHL, DL, InVT, In,
+                     DAG.getConstant(ShiftInx, DL, InVT));
+  }
+  return DAG.getNode(X86ISD::TESTM, DL, VT, In, In);
 }

 SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
@ -16572,9 +16682,29 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
  MVT VT = Op.getSimpleValueType();

  if (VT.isVector()) {
-    assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
    SDValue Src = Op.getOperand(0);
    SDLoc dl(Op);
+
+    if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) {
+      MVT ResVT = MVT::v4i32;
+      MVT TruncVT = MVT::v4i1;
+      unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+      if (!IsSigned && !Subtarget.hasVLX()) {
+        // Widen to 512-bits.
+        ResVT = MVT::v8i32;
+        TruncVT = MVT::v8i1;
+        Opc = ISD::FP_TO_UINT;
+        Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64,
+                          DAG.getUNDEF(MVT::v8f64),
+                          Src, DAG.getIntPtrConstant(0, dl));
+      }
+      SDValue Res = DAG.getNode(Opc, dl, ResVT, Src);
+      Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res,
+                         DAG.getIntPtrConstant(0, dl));
+    }
+
+    assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
    if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
      return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
                         DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
@ -18629,6 +18759,7 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
                        DAG.getUNDEF(ExtVT), Op, DAG.getIntPtrConstant(0, dl));
    }
    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i1, Op);
+    Op = DAG.getBitcast(MVT::i8, Op);
    return DAG.getStore(St->getChain(), dl, Op, St->getBasePtr(),
                        St->getMemOperand());
  }
@ -18645,12 +18776,12 @@ static SDValue LowerTruncatingStore(SDValue StOp, const X86Subtarget &Subtarget,
                            DAG.getIntPtrConstant(16, dl));
  Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::v16i1, Hi);

-  SDValue BasePtrHi =
-    DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
-                DAG.getConstant(2, dl, BasePtr.getValueType()));
+  SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl);

  SDValue StHi = DAG.getStore(St->getChain(), dl, Hi,
-                              BasePtrHi, St->getMemOperand());
+                              BasePtrHi, St->getPointerInfo().getWithOffset(2),
+                              MinAlign(St->getAlignment(), 2U),
+                              St->getMemOperand()->getFlags());
  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StLo, StHi);
 }

@ -24545,6 +24676,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
  case ISD::INSERT_SUBVECTOR:   return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
+  case ISD::EXTRACT_SUBVECTOR:  return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
@ -29735,7 +29867,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
 /// by this operation to try to flow through the rest of the combiner
 /// the fact that they're unused.
 static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget,
-                             SDValue &Opnd0, SDValue &Opnd1, 
+                             SDValue &Opnd0, SDValue &Opnd1,
                             bool matchSubAdd = false) {

  EVT VT = N->getValueType(0);
@ -30309,9 +30441,35 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
  // (i16 movmsk (16i8 sext (v16i1 x)))
  // before the setcc result is scalarized on subtargets that don't have legal
  // vxi1 types.
-  if (DCI.isBeforeLegalize())
+  if (DCI.isBeforeLegalize()) {
    if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
      return V;
+
+    // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
+    // type, widen both sides to avoid a trip through memory.
+    if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
+        Subtarget.hasVLX()) {
+      SDLoc dl(N);
+      N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
+      N0 = DAG.getBitcast(MVT::v8i1, N0);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
+                         DAG.getIntPtrConstant(0, dl));
+    }
+
+    // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
+    // type, widen both sides to avoid a trip through memory.
+    if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() &&
+        Subtarget.hasVLX()) {
+      SDLoc dl(N);
+      unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
+      SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
+      Ops[0] = N0;
+      N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops);
+      N0 = DAG.getBitcast(MVT::i8, N0);
+      return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+    }
+  }
+
  // Since MMX types are special and don't usually play with other vector types,
  // it's better to handle them early to be sure we emit efficient code by
  // avoiding store-load conversions.
@ -30791,6 +30949,11 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
  if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
    return SDValue();

+  // Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
+  if (X86ISD::VBROADCAST == Src.getOpcode() &&
+      Src.getOperand(0).getValueType() == VT)
+    return Src.getOperand(0);
+
  // Resolve the target shuffle inputs and mask.
  SmallVector<int, 16> Mask;
  SmallVector<SDValue, 2> Ops;
@ -36153,13 +36316,23 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
  assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");

-  // We're looking for an oversized integer equality comparison, but ignore a
-  // comparison with zero because that gets special treatment in EmitTest().
+  // We're looking for an oversized integer equality comparison.
  SDValue X = SetCC->getOperand(0);
  SDValue Y = SetCC->getOperand(1);
  EVT OpVT = X.getValueType();
  unsigned OpSize = OpVT.getSizeInBits();
-  if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
+  if (!OpVT.isScalarInteger() || OpSize < 128)
+    return SDValue();
+
+  // Ignore a comparison with zero because that gets special treatment in
+  // EmitTest(). But make an exception for the special case of a pair of
+  // logically-combined vector-sized operands compared to zero. This pattern may
+  // be generated by the memcmp expansion pass with oversized integer compares
+  // (see PR33325).
+  bool IsOrXorXorCCZero = isNullConstant(Y) && X.getOpcode() == ISD::OR &&
+                          X.getOperand(0).getOpcode() == ISD::XOR &&
+                          X.getOperand(1).getOpcode() == ISD::XOR;
+  if (isNullConstant(Y) && !IsOrXorXorCCZero)
    return SDValue();

  // Bail out if we know that this is not really just an oversized integer.
@ -36174,15 +36347,29 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
  if ((OpSize == 128 && Subtarget.hasSSE2()) ||
      (OpSize == 256 && Subtarget.hasAVX2())) {
    EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
-    SDValue VecX = DAG.getBitcast(VecVT, X);
-    SDValue VecY = DAG.getBitcast(VecVT, Y);
-
+    SDValue Cmp;
+    if (IsOrXorXorCCZero) {
+      // This is a bitwise-combined equality comparison of 2 pairs of vectors:
+      // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
+      // Use 2 vector equality compares and 'and' the results before doing a
+      // MOVMSK.
+      SDValue A = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(0));
+      SDValue B = DAG.getBitcast(VecVT, X.getOperand(0).getOperand(1));
+      SDValue C = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(0));
+      SDValue D = DAG.getBitcast(VecVT, X.getOperand(1).getOperand(1));
+      SDValue Cmp1 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, A, B);
+      SDValue Cmp2 = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, C, D);
+      Cmp = DAG.getNode(ISD::AND, DL, VecVT, Cmp1, Cmp2);
+    } else {
+      SDValue VecX = DAG.getBitcast(VecVT, X);
+      SDValue VecY = DAG.getBitcast(VecVT, Y);
+      Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
+    }
    // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
    // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
    // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
    // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
    // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
-    SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
    SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
    SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
                                    MVT::i32);
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@ -1023,6 +1023,8 @@ namespace llvm {
      return NumElem > 2;
    }

+    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
+
    /// Intel processors have a unified instruction and data cache
    const char * getClearCacheBuiltinName() const override {
      return nullptr; // nothing to do, move along.
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@ -2701,11 +2701,6 @@ def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),

 // Load/store kreg
 let Predicates = [HasDQI] in {
-  def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
-            (KMOVBmk addr:$dst, VK8:$src)>;
-  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
-            (KMOVBkm addr:$src)>;
-
  def : Pat<(store VK4:$src, addr:$dst),
            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
  def : Pat<(store VK2:$src, addr:$dst),
@ -2745,22 +2740,10 @@ let Predicates = [HasAVX512, NoDQI] in {
 }

 let Predicates = [HasAVX512] in {
-  def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
-            (KMOVWmk addr:$dst, VK16:$src)>;
  def : Pat<(v1i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
-  def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
-            (KMOVWkm addr:$src)>;
-}
-let Predicates = [HasBWI] in {
-  def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
-            (KMOVDmk addr:$dst, VK32:$src)>;
-  def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
-            (KMOVDkm addr:$src)>;
-  def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
-            (KMOVQmk addr:$dst, VK64:$src)>;
-  def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
-            (KMOVQkm addr:$src)>;
+            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
+  def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
+            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
 }

 let Predicates = [HasAVX512] in {
@ -3087,66 +3070,6 @@ defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;

 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;

-
-multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
-                                     X86KVectorVTInfo To, Predicate prd> {
-let Predicates = [prd] in
-  def :
-    Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
-        (To.KVT(COPY_TO_REGCLASS
-                  (!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
-                      (i8 imm:$imm8)), To.KRC))>;
-}
-
-multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
-                                             X86KVectorVTInfo To> {
-def :
-  Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
-      (To.KVT(COPY_TO_REGCLASS
-               (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
-                   (i8 imm:$imm8)), To.KRC))>;
-}
-
-defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
-defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
-
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
-defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
-
-// Patterns for kmask shift
-multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
-  def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
-            (VT (COPY_TO_REGCLASS
-                   (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
-                               (I8Imm $imm)),
-                   RC))>;
-  def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
-            (VT (COPY_TO_REGCLASS
-                   (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
-                               (I8Imm $imm)),
-                   RC))>;
-}
-
-defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
-defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
-defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
 //===----------------------------------------------------------------------===//
 // AVX-512 - Aligned and unaligned load and store
 //
@ -3428,28 +3351,33 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
                           (v16i32 VR512:$src))),
                  (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;

+multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
+                              X86VectorVTInfo Wide> {
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+                               Narrow.RC:$src1, Narrow.RC:$src0)),
+           (EXTRACT_SUBREG
+            (Wide.VT
+             (!cast<Instruction>(InstrStr#"rrk")
+              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
+              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+            Narrow.SubRegIdx)>;
+
+ def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
+                               Narrow.RC:$src1, Narrow.ImmAllZerosV)),
+           (EXTRACT_SUBREG
+            (Wide.VT
+             (!cast<Instruction>(InstrStr#"rrkz")
+              (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
+              (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
+            Narrow.SubRegIdx)>;
+}
+
 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
 // available. Use a 512-bit operation and extract.
 let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
-                          (v8f32 VR256X:$src0))),
-          (EXTRACT_SUBREG
-           (v16f32
-            (VMOVAPSZrrk
-             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
-             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-             (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
-           sub_ymm)>;
-
-def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
-                          (v8i32 VR256X:$src0))),
-          (EXTRACT_SUBREG
-           (v16i32
-            (VMOVDQA32Zrrk
-             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
-             (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-             (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
-           sub_ymm)>;
+  defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
+  defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
 }

 let Predicates = [HasAVX512] in {
@ -4633,7 +4561,7 @@ multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
             sub_xmm)>;
 }

-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoVLX] in {
  defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
  defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
  defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
--- a/Show More
+++ b/Show More