From 5defb46a4ef2f417dd393905f1bd484040f901a2 Mon Sep 17 00:00:00 2001 From: dim Date: Mon, 26 Jun 2017 20:33:34 +0000 Subject: [PATCH 1/5] Vendor import of libc++ trunk r306325: https://llvm.org/svn/llvm-project/libcxx/trunk@306325 --- CMakeLists.txt | 1 + include/__config | 15 +- include/experimental/any | 9 +- include/new | 20 +-- include/numeric | 87 ++++++++++ include/variant | 2 + src/experimental/filesystem/path.cpp | 5 +- .../alg.foreach/for_each_n.pass.cpp | 10 +- .../any/any.class/any.assign/copy.pass.cpp | 7 +- .../any/any.class/any.assign/move.pass.cpp | 7 +- .../any/any.class/any.assign/value.pass.cpp | 7 +- .../any/any.class/any.cons/copy.pass.cpp | 7 +- .../any/any.class/any.cons/move.pass.cpp | 7 +- .../any/any.class/any.cons/value.pass.cpp | 7 +- .../any.class/any.modifiers/clear.pass.cpp | 7 +- .../any/any.class/any.modifiers/swap.pass.cpp | 7 +- .../iterator.operations/advance.pass.cpp | 2 +- .../iterator.operations/prev.pass.cpp | 2 +- .../new_deployment.fail.cpp | 36 ++++ .../exclusive.scan/exclusive_scan.pass.cpp | 6 +- .../exclusive_scan_init_op.pass.cpp | 5 +- .../inclusive.scan/inclusive_scan.pass.cpp | 102 +++++++++++ .../inclusive.scan/inclusive_scan_op.pass.cpp | 112 ++++++++++++ .../inclusive_scan_op_init.pass.cpp | 128 ++++++++++++++ .../reduce/reduce_init_op.pass.cpp | 2 +- ...sform_exclusive_scan_init_bop_uop.pass.cpp | 8 +- .../transform_inclusive_scan_bop_uop.pass.cpp | 133 +++++++++++++++ ...sform_inclusive_scan_bop_uop_init.pass.cpp | 160 ++++++++++++++++++ ...orm_reduce_iter_iter_init_bop_uop.pass.cpp | 8 +- ...nsform_reduce_iter_iter_iter_init.pass.cpp | 4 +- ..._reduce_iter_iter_iter_init_op_op.pass.cpp | 8 +- .../allocator.adaptor.cnstr/allocs.pass.cpp | 2 +- .../func.wrap.func.con/copy_move.pass.cpp | 2 +- .../optional.object.ctor/move.pass.cpp | 2 +- .../tuple.tuple/tuple.cnstr/dtor.pass.cpp | 2 +- .../tuple_size_structured_bindings.pass.cpp | 16 +- .../utility/pairs/pairs.pair/dtor.pass.cpp | 2 +- .../variant.variant/variant.ctor/T.pass.cpp | 14 ++ test/support/count_new.hpp | 15 +- test/support/experimental_any_helpers.h | 2 + www/upcoming_meeting.html | 84 +++------ 41 files changed, 899 insertions(+), 163 deletions(-) create mode 100644 test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp create mode 100644 test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp create mode 100644 test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp create mode 100644 test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp create mode 100644 test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp create mode 100644 test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 33d888b807da..5b5971036b88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,6 +118,7 @@ if (LIBCXX_CXX_ABI STREQUAL "default") cxxabi.h PATHS ${LLVM_MAIN_SRC_DIR}/projects/libcxxabi/include ${LLVM_MAIN_SRC_DIR}/runtimes/libcxxabi/include + ${LLVM_MAIN_SRC_DIR}/../libcxxabi/include NO_DEFAULT_PATH ) if (LIBCXX_TARGETING_MSVC) diff --git a/include/__config b/include/__config index fc24a3bc9ba9..aae053102f55 100644 --- a/include/__config +++ b/include/__config @@ -1154,6 +1154,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( __attribute__((availability(watchos,strict,introduced=3.0))) #define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS __attribute__((unavailable)) #define _LIBCPP_AVAILABILITY_BAD_ARRAY_LENGTH __attribute__((unavailable)) +#define _LIBCPP_AVAILABILITY_BAD_ANY_CAST __attribute__((unavailable)) #define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ @@ -1175,25 +1176,35 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) +#define _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION \ + __attribute__((availability(macosx,strict,introduced=10.13))) \ + __attribute__((availability(ios,strict,introduced=11.0))) \ + __attribute__((availability(tvos,strict,introduced=11.0))) \ + __attribute__((availability(watchos,strict,introduced=4.0))) #else #define _LIBCPP_AVAILABILITY_SHARED_MUTEX #define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS #define _LIBCPP_AVAILABILITY_BAD_ARRAY_LENGTH +#define _LIBCPP_AVAILABILITY_BAD_ANY_CAST #define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS #define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE #define _LIBCPP_AVAILABILITY_FUTURE_ERROR #define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE #define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY #define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR +#define _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION #endif // Define availability that depends on _LIBCPP_NO_EXCEPTIONS. #ifdef _LIBCPP_NO_EXCEPTIONS #define _LIBCPP_AVAILABILITY_DYNARRAY #define _LIBCPP_AVAILABILITY_FUTURE +#define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST #else #define _LIBCPP_AVAILABILITY_DYNARRAY _LIBCPP_AVAILABILITY_BAD_ARRAY_LENGTH #define _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_AVAILABILITY_FUTURE_ERROR +#define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST \ + _LIBCPP_AVAILABILITY_BAD_ANY_CAST #endif // Availability of stream API in the dylib got dropped and re-added. The @@ -1202,9 +1213,9 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( // availability(ios,introduced=7.0) #if defined(_LIBCPP_USE_AVAILABILITY_APPLE) && \ ((defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 1090) || \ + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1090) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ <= 70000)) + __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000)) #define _LIBCPP_AVAILABILITY_NO_STREAMS_EXTERN_TEMPLATE #endif diff --git a/include/experimental/any b/include/experimental/any index 022b379e858b..083a2909033c 100644 --- a/include/experimental/any +++ b/include/experimental/any @@ -89,7 +89,7 @@ inline namespace fundamentals_v1 { _LIBCPP_BEGIN_NAMESPACE_LFTS -class _LIBCPP_EXCEPTION_ABI bad_any_cast : public bad_cast +class _LIBCPP_EXCEPTION_ABI _LIBCPP_AVAILABILITY_BAD_ANY_CAST bad_any_cast : public bad_cast { public: virtual const char* what() const _NOEXCEPT; @@ -98,6 +98,7 @@ public: #if _LIBCPP_STD_VER > 11 // C++ > 11 _LIBCPP_NORETURN inline _LIBCPP_ALWAYS_INLINE +_LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST void __throw_bad_any_cast() { #ifndef _LIBCPP_NO_EXCEPTIONS @@ -506,7 +507,7 @@ void swap(any & __lhs, any & __rhs) _NOEXCEPT } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _ValueType any_cast(any const & __v) { static_assert( @@ -522,7 +523,7 @@ _ValueType any_cast(any const & __v) } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _ValueType any_cast(any & __v) { static_assert( @@ -537,7 +538,7 @@ _ValueType any_cast(any & __v) } template -_LIBCPP_INLINE_VISIBILITY +_LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _ValueType any_cast(any && __v) { static_assert( diff --git a/include/new b/include/new index 34df2efee09e..3945faee0e38 100644 --- a/include/new +++ b/include/new @@ -193,20 +193,20 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operato #endif #ifndef _LIBCPP_HAS_NO_ALIGNED_ALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; -_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT; -_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif -_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; -_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; -_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif #endif diff --git a/include/numeric b/include/numeric index 39e81934dfa1..1b7d97c5be07 100644 --- a/include/numeric +++ b/include/numeric @@ -81,6 +81,20 @@ template + OutputIterator + inclusive_scan(InputIterator first, InputIterator last, OutputIterator result); // C++17 + +template + OutputIterator + inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, BinaryOperation binary_op); // C++17 + +template + OutputIterator + inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, BinaryOperation binary_op, T init); // C++17 + template OutputIterator @@ -88,6 +102,21 @@ template + OutputIterator + transform_inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, + BinaryOperation binary_op, UnaryOperation unary_op); // C++17 + +template + OutputIterator + transform_inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, + BinaryOperation binary_op, UnaryOperation unary_op, + T init); // C++17 + template OutputIterator adjacent_difference(InputIterator first, InputIterator last, OutputIterator result); @@ -295,6 +324,38 @@ exclusive_scan(_InputIterator __first, _InputIterator __last, return _VSTD::exclusive_scan(__first, __last, __result, __init, _VSTD::plus<>()); } +template +_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b, _Tp __init) +{ + for (; __first != __last; ++__first, (void) ++__result) { + __init = __b(__init, *__first); + *__result = __init; + } + return __result; +} + +template +_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b) +{ + if (__first != __last) { + typename std::iterator_traits<_InputIterator>::value_type __init = *__first; + *__result++ = __init; + if (++__first != __last) + return _VSTD::inclusive_scan(__first, __last, __result, __b, __init); + } + + return __result; +} + +template +_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result) +{ + return _VSTD::inclusive_scan(__first, __last, __result, std::plus<>()); +} + template inline _LIBCPP_INLINE_VISIBILITY @@ -316,6 +377,32 @@ transform_exclusive_scan(_InputIterator __first, _InputIterator __last, } return __result; } + +template +_OutputIterator transform_inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init) +{ + for (; __first != __last; ++__first, (void) ++__result) { + __init = __b(__init, __u(*__first)); + *__result = __init; + } + + return __result; +} + +template +_OutputIterator transform_inclusive_scan(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _BinaryOp __b, _UnaryOp __u) +{ + if (__first != __last) { + typename std::iterator_traits<_InputIterator>::value_type __init = __u(*__first); + *__result++ = __init; + if (++__first != __last) + return _VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init); + } + + return __result; +} #endif template diff --git a/include/variant b/include/variant index 8711ef6eb387..f8d3e28bae45 100644 --- a/include/variant +++ b/include/variant @@ -1116,6 +1116,8 @@ public: template < class _Arg, enable_if_t, variant>, int> = 0, + enable_if_t>::value, int> = 0, + enable_if_t>::value, int> = 0, class _Tp = __variant_detail::__best_match_t<_Arg, _Types...>, size_t _Ip = __find_detail::__find_unambiguous_index_sfinae<_Tp, _Types...>::value, diff --git a/src/experimental/filesystem/path.cpp b/src/experimental/filesystem/path.cpp index f49d4cd2d418..dd4026cfe13a 100644 --- a/src/experimental/filesystem/path.cpp +++ b/src/experimental/filesystem/path.cpp @@ -261,7 +261,8 @@ struct PathParser { string_view_pair separate_filename(string_view_t const & s) { if (s == "." || s == ".." || s.empty()) return string_view_pair{s, ""}; auto pos = s.find_last_of('.'); - if (pos == string_view_t::npos) return string_view_pair{s, string_view{}}; + if (pos == string_view_t::npos) + return string_view_pair{s, string_view_t{}}; return string_view_pair{s.substr(0, pos), s.substr(pos)}; } @@ -396,7 +397,7 @@ int path::__compare(string_view_t __s) const { size_t hash_value(const path& __p) noexcept { auto PP = PathParser::CreateBegin(__p.native()); size_t hash_value = 0; - std::hash hasher; + std::hash hasher; while (PP) { hash_value = __hash_combine(hash_value, hasher(*PP)); ++PP; diff --git a/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp b/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp index fd24edb43060..9b391f01cea5 100644 --- a/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp +++ b/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp @@ -36,15 +36,15 @@ int main() auto f = for_each_test(0); Iter it = std::for_each_n(Iter(ia), 0, std::ref(f)); assert(it == Iter(ia)); - assert(f.count == 0); + assert(f.count == 0); } { auto f = for_each_test(0); Iter it = std::for_each_n(Iter(ia), s, std::ref(f)); - + assert(it == Iter(ia+s)); - assert(f.count == s); + assert(f.count == s); for (unsigned i = 0; i < s; ++i) assert(ia[i] == static_cast(i+1)); } @@ -52,9 +52,9 @@ int main() { auto f = for_each_test(0); Iter it = std::for_each_n(Iter(ia), 1, std::ref(f)); - + assert(it == Iter(ia+1)); - assert(f.count == 1); + assert(f.count == 1); for (unsigned i = 0; i < 1; ++i) assert(ia[i] == static_cast(i+2)); } diff --git a/test/std/experimental/any/any.class/any.assign/copy.pass.cpp b/test/std/experimental/any/any.class/any.assign/copy.pass.cpp index 7140fab88feb..0b9d71e5f705 100644 --- a/test/std/experimental/any/any.class/any.assign/copy.pass.cpp +++ b/test/std/experimental/any/any.class/any.assign/copy.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.assign/move.pass.cpp b/test/std/experimental/any/any.class/any.assign/move.pass.cpp index 35fc56c24d3f..72351aeae68f 100644 --- a/test/std/experimental/any/any.class/any.assign/move.pass.cpp +++ b/test/std/experimental/any/any.class/any.assign/move.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.assign/value.pass.cpp b/test/std/experimental/any/any.class/any.assign/value.pass.cpp index 8ff4ad594151..cd4646fb5fd5 100644 --- a/test/std/experimental/any/any.class/any.assign/value.pass.cpp +++ b/test/std/experimental/any/any.class/any.assign/value.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.cons/copy.pass.cpp b/test/std/experimental/any/any.class/any.cons/copy.pass.cpp index 47f12d739236..d477394c0b91 100644 --- a/test/std/experimental/any/any.class/any.cons/copy.pass.cpp +++ b/test/std/experimental/any/any.class/any.cons/copy.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.cons/move.pass.cpp b/test/std/experimental/any/any.class/any.cons/move.pass.cpp index c5395daa3da6..ef980ca5f222 100644 --- a/test/std/experimental/any/any.class/any.cons/move.pass.cpp +++ b/test/std/experimental/any/any.class/any.cons/move.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.cons/value.pass.cpp b/test/std/experimental/any/any.class/any.cons/value.pass.cpp index fcace5028eca..d37990e6ca37 100644 --- a/test/std/experimental/any/any.class/any.cons/value.pass.cpp +++ b/test/std/experimental/any/any.class/any.cons/value.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.modifiers/clear.pass.cpp b/test/std/experimental/any/any.class/any.modifiers/clear.pass.cpp index 1b0b75d61fee..a19bd38f129b 100644 --- a/test/std/experimental/any/any.class/any.modifiers/clear.pass.cpp +++ b/test/std/experimental/any/any.class/any.modifiers/clear.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/experimental/any/any.class/any.modifiers/swap.pass.cpp b/test/std/experimental/any/any.class/any.modifiers/swap.pass.cpp index 9f499b4a1fb3..8de582a6b76e 100644 --- a/test/std/experimental/any/any.class/any.modifiers/swap.pass.cpp +++ b/test/std/experimental/any/any.class/any.modifiers/swap.pass.cpp @@ -9,12 +9,7 @@ // UNSUPPORTED: c++98, c++03, c++11 -// XFAIL: with_system_cxx_lib=macosx10.12 -// XFAIL: with_system_cxx_lib=macosx10.11 -// XFAIL: with_system_cxx_lib=macosx10.10 -// XFAIL: with_system_cxx_lib=macosx10.9 -// XFAIL: with_system_cxx_lib=macosx10.7 -// XFAIL: with_system_cxx_lib=macosx10.8 +// XFAIL: availability=macosx // diff --git a/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp index e5bd5603b8e3..ff1b3e7ead43 100644 --- a/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp +++ b/test/std/iterators/iterator.primitives/iterator.operations/advance.pass.cpp @@ -35,7 +35,7 @@ test(It i, typename std::iterator_traits::difference_type n, It x) #if TEST_STD_VER > 14 template -constexpr bool +constexpr bool constepxr_test(It i, typename std::iterator_traits::difference_type n, It x) { std::advance(i, n); diff --git a/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp b/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp index 465cda1c47e4..1eb91881fe11 100644 --- a/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp +++ b/test/std/iterators/iterator.primitives/iterator.operations/prev.pass.cpp @@ -71,5 +71,5 @@ int main() static_assert( constexpr_test(s+1, s), "" ); } #endif - + } diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp new file mode 100644 index 000000000000..f9f487dc7ef8 --- /dev/null +++ b/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14 +// REQUIRES: availability=macosx10.12 + +// test availability of new/delete operators introduced in c++17. + +#include + +int main () { + int *p0 = new ((std::align_val_t)16) int(1); + (void)p0; + int *p1 = new ((std::align_val_t)16) int[1]; + (void)p1; + // expected-error@-4 {{call to unavailable function 'operator new': introduced in macOS 10.13}} + // expected-note@new:* {{candidate function has been explicitly made unavailable}} + // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'const std::nothrow_t' for 2nd argument}} + // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'void *' for 2nd argument}} + // expected-note@new:* {{candidate function not viable: requires single argument '__sz', but 2 arguments were provided}} + // expected-note@new:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + + // expected-error@-9 {{call to unavailable function 'operator new[]': introduced in macOS 10.13}} + // expected-note@new:* {{candidate function has been explicitly made unavailable}} + // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'const std::nothrow_t' for 2nd argument}} + // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'void *' for 2nd argument}} + // expected-note@new:* {{candidate function not viable: requires single argument '__sz', but 2 arguments were provided}} + // expected-note@new:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + return 0; +} diff --git a/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp b/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp index 6fdd288e2d6a..34181f508e2a 100644 --- a/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp +++ b/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp @@ -13,7 +13,7 @@ // template // OutputIterator exclusive_scan(InputIterator first, InputIterator last, // OutputIterator result, T init); -// +// #include #include @@ -26,7 +26,7 @@ void test(Iter1 first, Iter1 last, T init, Iter2 rFirst, Iter2 rLast) { std::vector::value_type> v; - + // Not in place std::exclusive_scan(first, last, std::back_inserter(v), init); assert(std::equal(v.begin(), v.end(), rFirst, rLast)); @@ -35,7 +35,7 @@ test(Iter1 first, Iter1 last, T init, Iter2 rFirst, Iter2 rLast) v.clear(); v.assign(first, last); std::exclusive_scan(v.begin(), v.end(), v.begin(), init); - assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); } diff --git a/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp b/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp index ba1673fe467f..c15cb1661e25 100644 --- a/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp +++ b/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp @@ -13,7 +13,7 @@ // template // OutputIterator // exclusive_scan(InputIterator first, InputIterator last, -// OutputIterator result, +// OutputIterator result, // T init, BinaryOperation binary_op); // C++17 #include @@ -36,7 +36,7 @@ test(Iter1 first, Iter1 last, T init, Op op, Iter2 rFirst, Iter2 rLast) v.clear(); v.assign(first, last); std::exclusive_scan(v.begin(), v.end(), v.begin(), init, op); - assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); } @@ -84,4 +84,3 @@ int main() } } } - \ No newline at end of file diff --git a/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp new file mode 100644 index 000000000000..af5b1f0d3717 --- /dev/null +++ b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp @@ -0,0 +1,102 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// template +// OutputIterator inclusive_scan(InputIterator first, InputIterator last, +// OutputIterator result, T init); +// + +#include +#include +#include + +#include "test_iterators.h" + +template +void +test(Iter1 first, Iter1 last, Iter2 rFirst, Iter2 rLast) +{ + std::vector::value_type> v; + +// Not in place + std::inclusive_scan(first, last, std::back_inserter(v)); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + +// In place + v.clear(); + v.assign(first, last); + std::inclusive_scan(v.begin(), v.end(), v.begin()); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); +} + + +template +void +test() +{ + int ia[] = {1, 3, 5, 7, 9}; + const int pRes[] = {1, 4, 9, 16, 25}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + static_assert(sa == sizeof(pRes) / sizeof(pRes[0])); // just to be sure + + for (unsigned int i = 0; i < sa; ++i ) + test(Iter(ia), Iter(ia + i), pRes, pRes + i); +} + +int triangle(int n) { return n*(n+1)/2; } + +// Basic sanity +void basic_tests() +{ + { + std::vector v(10); + std::fill(v.begin(), v.end(), 3); + std::inclusive_scan(v.begin(), v.end(), v.begin()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == (int)(i+1) * 3); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 0); + std::inclusive_scan(v.begin(), v.end(), v.begin()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i)); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::inclusive_scan(v.begin(), v.end(), v.begin()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i + 1)); + } + + { + std::vector v, res; + std::inclusive_scan(v.begin(), v.end(), std::back_inserter(res)); + assert(res.empty()); + } +} + +int main() +{ + basic_tests(); + +// All the iterator categories + test >(); + test >(); + test >(); + test >(); + test(); + test< int*>(); +} diff --git a/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp new file mode 100644 index 000000000000..95db56bf67d2 --- /dev/null +++ b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp @@ -0,0 +1,112 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// template +// OutputIterator +// inclusive_scan(InputIterator first, InputIterator last, +// OutputIterator result, +// BinaryOperation binary_op); // C++17 + +#include +#include +#include +#include + +#include "test_iterators.h" + +template +void +test(Iter1 first, Iter1 last, Op op, Iter2 rFirst, Iter2 rLast) +{ + std::vector::value_type> v; + +// Not in place + std::inclusive_scan(first, last, std::back_inserter(v), op); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + +// In place + v.clear(); + v.assign(first, last); + std::inclusive_scan(v.begin(), v.end(), v.begin(), op); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); +} + + +template +void +test() +{ + int ia[] = {1, 3, 5, 7, 9}; + const int pRes[] = {1, 4, 9, 16, 25}; + const int mRes[] = {1, 3, 15, 105, 945}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + static_assert(sa == sizeof(pRes) / sizeof(pRes[0])); // just to be sure + static_assert(sa == sizeof(mRes) / sizeof(mRes[0])); // just to be sure + + for (unsigned int i = 0; i < sa; ++i ) { + test(Iter(ia), Iter(ia + i), std::plus<>(), pRes, pRes + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), mRes, mRes + i); + } +} + +int triangle(int n) { return n*(n+1)/2; } + +// Basic sanity +void basic_tests() +{ + { + std::vector v(10); + std::fill(v.begin(), v.end(), 3); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == (int)(i+1) * 3); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 0); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i)); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i + 1)); + } + + { + std::vector v, res; + std::inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::plus<>()); + assert(res.empty()); + } +} + + +int main() +{ + + basic_tests(); + +// All the iterator categories +// test >(); +// test >(); +// test >(); +// test >(); +// test(); +// test< int*>(); + +} + \ No newline at end of file diff --git a/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp new file mode 100644 index 000000000000..04913f904885 --- /dev/null +++ b/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp @@ -0,0 +1,128 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// template +// OutputIterator +// inclusive_scan(InputIterator first, InputIterator last, +// OutputIterator result, +// BinaryOperation binary_op, T init); // C++17 + +#include +#include +#include + +#include "test_iterators.h" + +template +void +test(Iter1 first, Iter1 last, Op op, T init, Iter2 rFirst, Iter2 rLast) +{ + std::vector::value_type> v; + +// Not in place + std::inclusive_scan(first, last, std::back_inserter(v), op, init); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + +// In place + v.clear(); + v.assign(first, last); + std::inclusive_scan(v.begin(), v.end(), v.begin(), op, init); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); +} + + +template +void +test() +{ + int ia[] = {1, 3, 5, 7, 9}; + const int pRes[] = {1, 4, 9, 16, 25}; + const int mRes[] = {1, 3, 15, 105, 945}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + static_assert(sa == sizeof(pRes) / sizeof(pRes[0])); // just to be sure + static_assert(sa == sizeof(mRes) / sizeof(mRes[0])); // just to be sure + + for (unsigned int i = 0; i < sa; ++i ) { + test(Iter(ia), Iter(ia + i), std::plus<>(), 0, pRes, pRes + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), 1, mRes, mRes + i); + } +} + +int triangle(int n) { return n*(n+1)/2; } + +// Basic sanity +void basic_tests() +{ + { + std::vector v(10); + std::fill(v.begin(), v.end(), 3); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), 50); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 50 + (int)(i+1) * 3); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 0); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), 40); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 40 + triangle(i)); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), 30); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 30 + triangle(i + 1)); + } + + { + std::vector v, res; + std::inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::plus<>(), 40); + assert(res.empty()); + } + +// Make sure that the calculations are done using the init typedef + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::vector res; + std::inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::multiplies<>(), 1); + + assert(res.size() == 10); + int j = 1; + assert(res[0] == 1); + for (size_t i = 1; i < v.size(); ++i) + { + j *= i + 1; + assert(res[i] == j); + } + } +} + + +int main() +{ + + basic_tests(); + +// All the iterator categories + test >(); + test >(); + test >(); + test >(); + test(); + test< int*>(); + +} + \ No newline at end of file diff --git a/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp b/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp index 5238a1f2ed5b..e3e2ffe7be0c 100644 --- a/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp +++ b/test/std/numerics/numeric.ops/reduce/reduce_init_op.pass.cpp @@ -12,7 +12,7 @@ // template // T reduce(InputIterator first, InputIterator last, T init, BinaryOperation op); - + #include #include diff --git a/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp b/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp index 2370e9ea9137..d96d4d6ec439 100644 --- a/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp +++ b/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp @@ -10,7 +10,7 @@ // // UNSUPPORTED: c++98, c++03, c++11, c++14 -// template // OutputIterator transform_exclusive_scan(InputIterator first, InputIterator last, // OutputIterator result, T init, @@ -64,11 +64,11 @@ test() { int ia[] = { 1, 3, 5, 7, 9}; const int pResI0[] = { 0, 1, 4, 9, 16}; // with identity - const int mResI0[] = { 0, 0, 0, 0, 0}; + const int mResI0[] = { 0, 0, 0, 0, 0}; const int pResN0[] = { 0, -1, -4, -9, -16}; // with negate const int mResN0[] = { 0, 0, 0, 0, 0}; const int pResI2[] = { 2, 3, 6, 11, 18}; // with identity - const int mResI2[] = { 2, 2, 6, 30, 210}; + const int mResI2[] = { 2, 2, 6, 30, 210}; const int pResN2[] = { 2, 1, -2, -7, -14}; // with negate const int mResN2[] = { 2, -2, 6, -30, 210}; const unsigned sa = sizeof(ia) / sizeof(ia[0]); @@ -149,7 +149,7 @@ void basic_tests() int main() { basic_tests(); - + // All the iterator categories test >(); test >(); diff --git a/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp b/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp new file mode 100644 index 000000000000..b96218d7da4f --- /dev/null +++ b/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp @@ -0,0 +1,133 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// template +// OutputIterator transform_inclusive_scan(InputIterator first, InputIterator last, +// OutputIterator result, +// BinaryOperation binary_op, +// UnaryOperation unary_op); + + +#include +#include +#include +#include + +#include "test_iterators.h" + +template +struct identity : std::unary_function<_Tp, _Tp> +{ + constexpr const _Tp& operator()(const _Tp& __x) const { return __x;} +}; + +template <> +struct identity +{ + template + constexpr auto operator()(_Tp&& __x) const + _NOEXCEPT_(noexcept(_VSTD::forward<_Tp>(__x))) + -> decltype (_VSTD::forward<_Tp>(__x)) + { return _VSTD::forward<_Tp>(__x); } +}; + +template +void +test(Iter1 first, Iter1 last, BOp bop, UOp uop, Iter2 rFirst, Iter2 rLast) +{ + std::vector::value_type> v; +// Test not in-place + std::transform_inclusive_scan(first, last, std::back_inserter(v), bop, uop); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + +// Test in-place + v.clear(); + v.assign(first, last); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), bop, uop); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); +} + + +template +void +test() +{ + int ia[] = { 1, 3, 5, 7, 9}; + const int pResI0[] = { 1, 4, 9, 16, 25}; // with identity + const int mResI0[] = { 1, 3, 15, 105, 945}; + const int pResN0[] = { -1, -4, -9, -16, -25}; // with negate + const int mResN0[] = { -1, 3, -15, 105, -945}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + static_assert(sa == sizeof(pResI0) / sizeof(pResI0[0])); // just to be sure + static_assert(sa == sizeof(mResI0) / sizeof(mResI0[0])); // just to be sure + static_assert(sa == sizeof(pResN0) / sizeof(pResN0[0])); // just to be sure + static_assert(sa == sizeof(mResN0) / sizeof(mResN0[0])); // just to be sure + + for (unsigned int i = 0; i < sa; ++i ) { + test(Iter(ia), Iter(ia + i), std::plus<>(), identity<>(), pResI0, pResI0 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), identity<>(), mResI0, mResI0 + i); + test(Iter(ia), Iter(ia + i), std::plus<>(), std::negate<>(), pResN0, pResN0 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), std::negate<>(), mResN0, mResN0 + i); + } +} + +int triangle(int n) { return n*(n+1)/2; } + +// Basic sanity +void basic_tests() +{ + { + std::vector v(10); + std::fill(v.begin(), v.end(), 3); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>()); + std::copy(v.begin(), v.end(), std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == (int)(i+1) * 3); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 0); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i)); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>()); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == triangle(i + 1)); + } + + { + std::vector v, res; + std::transform_inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::plus<>(), identity<>()); + assert(res.empty()); + } +} + +int main() +{ + basic_tests(); + +// All the iterator categories + test >(); + test >(); + test >(); + test >(); + test(); + test< int*>(); +} diff --git a/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp b/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp new file mode 100644 index 000000000000..df68946a5e65 --- /dev/null +++ b/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp @@ -0,0 +1,160 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// template +// OutputIterator transform_inclusive_scan(InputIterator first, InputIterator last, +// OutputIterator result, +// BinaryOperation binary_op, +// UnaryOperation unary_op, +// T init); + + +#include +#include +#include + +#include "test_iterators.h" + +template +struct identity : std::unary_function<_Tp, _Tp> +{ + constexpr const _Tp& operator()(const _Tp& __x) const { return __x;} +}; + +template <> +struct identity +{ + template + constexpr auto operator()(_Tp&& __x) const + _NOEXCEPT_(noexcept(_VSTD::forward<_Tp>(__x))) + -> decltype (_VSTD::forward<_Tp>(__x)) + { return _VSTD::forward<_Tp>(__x); } +}; + +template +void +test(Iter1 first, Iter1 last, BOp bop, UOp uop, T init, Iter2 rFirst, Iter2 rLast) +{ + std::vector::value_type> v; +// Test not in-place + std::transform_inclusive_scan(first, last, std::back_inserter(v), bop, uop, init); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); + +// Test in-place + v.clear(); + v.assign(first, last); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), bop, uop, init); + assert(std::equal(v.begin(), v.end(), rFirst, rLast)); +} + + +template +void +test() +{ + int ia[] = { 1, 3, 5, 7, 9}; + const int pResI0[] = { 1, 4, 9, 16, 25}; // with identity + const int mResI0[] = { 0, 0, 0, 0, 0}; + const int pResN0[] = { -1, -4, -9, -16, -25}; // with negate + const int mResN0[] = { 0, 0, 0, 0, 0}; + const int pResI2[] = { 3, 6, 11, 18, 27}; // with identity + const int mResI2[] = { 2, 6, 30, 210, 1890}; + const int pResN2[] = { 1, -2, -7, -14, -23}; // with negate + const int mResN2[] = { -2, 6, -30, 210, -1890}; + const unsigned sa = sizeof(ia) / sizeof(ia[0]); + static_assert(sa == sizeof(pResI0) / sizeof(pResI0[0])); // just to be sure + static_assert(sa == sizeof(mResI0) / sizeof(mResI0[0])); // just to be sure + static_assert(sa == sizeof(pResN0) / sizeof(pResN0[0])); // just to be sure + static_assert(sa == sizeof(mResN0) / sizeof(mResN0[0])); // just to be sure + static_assert(sa == sizeof(pResI2) / sizeof(pResI2[0])); // just to be sure + static_assert(sa == sizeof(mResI2) / sizeof(mResI2[0])); // just to be sure + static_assert(sa == sizeof(pResN2) / sizeof(pResN2[0])); // just to be sure + static_assert(sa == sizeof(mResN2) / sizeof(mResN2[0])); // just to be sure + + for (unsigned int i = 0; i < sa; ++i ) { + test(Iter(ia), Iter(ia + i), std::plus<>(), identity<>(), 0, pResI0, pResI0 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), identity<>(), 0, mResI0, mResI0 + i); + test(Iter(ia), Iter(ia + i), std::plus<>(), std::negate<>(), 0, pResN0, pResN0 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), std::negate<>(), 0, mResN0, mResN0 + i); + test(Iter(ia), Iter(ia + i), std::plus<>(), identity<>(), 2, pResI2, pResI2 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), identity<>(), 2, mResI2, mResI2 + i); + test(Iter(ia), Iter(ia + i), std::plus<>(), std::negate<>(), 2, pResN2, pResN2 + i); + test(Iter(ia), Iter(ia + i), std::multiplies<>(), std::negate<>(), 2, mResN2, mResN2 + i); + } +} + +int triangle(int n) { return n*(n+1)/2; } + +// Basic sanity +void basic_tests() +{ + { + std::vector v(10); + std::fill(v.begin(), v.end(), 3); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>(), 50); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 50 + (int) (i + 1) * 3); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 0); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>(), 30); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 30 + triangle(i)); + } + + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), identity<>(), 40); + for (size_t i = 0; i < v.size(); ++i) + assert(v[i] == 40 + triangle(i + 1)); + } + + { + std::vector v, res; + std::transform_inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::plus<>(), identity<>(), 1); + assert(res.empty()); + } + +// Make sure that the calculations are done using the init typedef + { + std::vector v(10); + std::iota(v.begin(), v.end(), 1); + std::vector res; + std::transform_inclusive_scan(v.begin(), v.end(), std::back_inserter(res), std::multiplies<>(), identity<>(), 1); + + assert(res.size() == 10); + int j = 1; + assert(res[0] == 1); + for (size_t i = 1; i < res.size(); ++i) + { + j *= i + 1; + assert(res[i] == j); + } + } +} + +int main() +{ + basic_tests(); + +// All the iterator categories + test >(); + test >(); + test >(); + test >(); + test(); + test< int*>(); +} diff --git a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp index c283f3e29722..b05792ddeb55 100644 --- a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp +++ b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_init_bop_uop.pass.cpp @@ -14,8 +14,8 @@ // class BinaryOperation, class UnaryOperation> // T transform_reduce(InputIterator1 first1, InputIterator1 last1, // T init, BinaryOperation binary_op, UnaryOperation unary_op); -// - +// + #include #include @@ -58,7 +58,7 @@ template void test(Iter1 first1, Iter1 last1, T init, BOp bOp, UOp uOp, T x) { - static_assert( std::is_same_v ); assert(std::transform_reduce(first1, last1, init, bOp, uOp) == x); } @@ -93,7 +93,7 @@ template void test_return_type() { T *p = nullptr; - static_assert( std::is_same_v(), identity<>()))> ); } diff --git a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp index f36b7d49410b..4f8142afe84a 100644 --- a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp +++ b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init.pass.cpp @@ -24,7 +24,7 @@ template void test(Iter1 first1, Iter1 last1, Iter2 first2, T init, T x) { - static_assert( std::is_same_v ); assert(std::transform_reduce(first1, last1, first2, init) == x); } @@ -52,7 +52,7 @@ template void test_return_type() { T *p = nullptr; - static_assert( std::is_same_v ); } diff --git a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp index b1b53293b991..41ea3b38a7f6 100644 --- a/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp +++ b/test/std/numerics/numeric.ops/transform.reduce/transform_reduce_iter_iter_iter_init_op_op.pass.cpp @@ -15,8 +15,8 @@ // T transform_reduce(InputIterator1 first1, InputIterator1 last1, // InputIterator2 first2, T init, // BinaryOperation1 binary_op1, BinaryOperation2 binary_op2); -// - +// + #include #include @@ -26,7 +26,7 @@ template void test(Iter1 first1, Iter1 last1, Iter2 first2, T init, Op1 op1, Op2 op2, T x) { - static_assert( std::is_same_v ); assert(std::transform_reduce(first1, last1, first2, init, op1, op2) == x); } @@ -54,7 +54,7 @@ template void test_return_type() { T *p = nullptr; - static_assert( std::is_same_v(), std::multiplies<>()))> ); } diff --git a/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp b/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp index 2aa19c6188a5..f18ed6e2bf0b 100644 --- a/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp +++ b/test/std/utilities/allocator.adaptor/allocator.adaptor.cnstr/allocs.pass.cpp @@ -111,7 +111,7 @@ int main() { static_assert(!std::is_convertible, A2>::value, ""); static_assert(!std::is_convertible< - std::scoped_allocator_adaptor>, + std::scoped_allocator_adaptor>, std::scoped_allocator_adaptor>>::value, ""); } } diff --git a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp index 7516b2e3af2b..faf4f11573d1 100644 --- a/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp +++ b/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp @@ -12,7 +12,7 @@ // class function // function(const function& f); -// function(const function&& f); +// function(function&& f); #include #include diff --git a/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp index 82acdd9d7758..55c2156300fb 100644 --- a/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp +++ b/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp @@ -157,7 +157,7 @@ int main() test(3); static_assert(constexpr_test(), "" ); static_assert(constexpr_test(3), "" ); - + { optional o(42); optional o2(std::move(o)); diff --git a/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp b/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp index d7b184f6383c..b4fd2e26425a 100644 --- a/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp +++ b/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/dtor.pass.cpp @@ -16,7 +16,7 @@ // ~tuple(); // C++17 added: -// The destructor of tuple shall be a trivial destructor +// The destructor of tuple shall be a trivial destructor // if (is_trivially_destructible_v && ...) is true. #include diff --git a/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp b/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp index aadbf3d5a369..03fb78caa08e 100644 --- a/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp +++ b/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp @@ -64,18 +64,22 @@ void test_decomp_tuple() { void test_decomp_pair() { typedef std::pair T; { - T s{99, 42.1}; + T s{99, 42.5}; auto [m1, m2] = s; auto& [r1, r2] = s; assert(m1 == 99); + assert(m2 == 42.5); assert(&r1 == &std::get<0>(s)); + assert(&r2 == &std::get<1>(s)); } { - T const s{99, 42.1}; + T const s{99, 42.5}; auto [m1, m2] = s; auto& [r1, r2] = s; assert(m1 == 99); + assert(m2 == 42.5); assert(&r1 == &std::get<0>(s)); + assert(&r2 == &std::get<1>(s)); } } @@ -86,14 +90,22 @@ void test_decomp_array() { auto [m1, m2, m3] = s; auto& [r1, r2, r3] = s; assert(m1 == 99); + assert(m2 == 42); + assert(m3 == -1); assert(&r1 == &std::get<0>(s)); + assert(&r2 == &std::get<1>(s)); + assert(&r3 == &std::get<2>(s)); } { T const s{{99, 42, -1}}; auto [m1, m2, m3] = s; auto& [r1, r2, r3] = s; assert(m1 == 99); + assert(m2 == 42); + assert(m3 == -1); assert(&r1 == &std::get<0>(s)); + assert(&r2 == &std::get<1>(s)); + assert(&r3 == &std::get<2>(s)); } } diff --git a/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp b/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp index 83c55e75b4d2..b25099f4d2e8 100644 --- a/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp +++ b/test/std/utilities/utility/pairs/pairs.pair/dtor.pass.cpp @@ -16,7 +16,7 @@ // ~pair() // C++17 added: -// The destructor of pair shall be a trivial destructor +// The destructor of pair shall be a trivial destructor // if (is_trivially_destructible_v && is_trivially_destructible_v) is true. diff --git a/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp index d33ea0bd3f4e..3f7cd4f0b6d2 100644 --- a/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp +++ b/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp @@ -37,6 +37,9 @@ struct NoThrowT { NoThrowT(int) noexcept(true) {} }; +struct AnyConstructible { template AnyConstructible(T&&) {} }; +struct NoConstructible { NoConstructible() = delete; }; + void test_T_ctor_noexcept() { { using V = std::variant; @@ -62,6 +65,17 @@ void test_T_ctor_sfinae() { static_assert(!std::is_constructible::value, "no matching constructor"); } + { + using V = std::variant; + static_assert( + !std::is_constructible>::value, + "no matching constructor"); + static_assert(!std::is_constructible>::value, + "no matching constructor"); + } + + + #if !defined(TEST_VARIANT_HAS_NO_REFERENCES) { using V = std::variant; diff --git a/test/support/count_new.hpp b/test/support/count_new.hpp index 1b05fe35e948..c001c0340fa2 100644 --- a/test/support/count_new.hpp +++ b/test/support/count_new.hpp @@ -231,12 +231,17 @@ class MemCounter const bool MemCounter::disable_checking = false; #endif -MemCounter globalMemCounter((MemCounter::MemCounterCtorArg_())); +inline MemCounter* getGlobalMemCounter() { + static MemCounter counter((MemCounter::MemCounterCtorArg_())); + return &counter; +} + +MemCounter &globalMemCounter = *getGlobalMemCounter(); #ifndef DISABLE_NEW_COUNT void* operator new(std::size_t s) TEST_THROW_SPEC(std::bad_alloc) { - globalMemCounter.newCalled(s); + getGlobalMemCounter()->newCalled(s); void* ret = std::malloc(s); if (ret == nullptr) detail::throw_bad_alloc_helper(); @@ -245,21 +250,21 @@ void* operator new(std::size_t s) TEST_THROW_SPEC(std::bad_alloc) void operator delete(void* p) TEST_NOEXCEPT { - globalMemCounter.deleteCalled(p); + getGlobalMemCounter()->deleteCalled(p); std::free(p); } void* operator new[](std::size_t s) TEST_THROW_SPEC(std::bad_alloc) { - globalMemCounter.newArrayCalled(s); + getGlobalMemCounter()->newArrayCalled(s); return operator new(s); } void operator delete[](void* p) TEST_NOEXCEPT { - globalMemCounter.deleteArrayCalled(p); + getGlobalMemCounter()->deleteArrayCalled(p); operator delete(p); } diff --git a/test/support/experimental_any_helpers.h b/test/support/experimental_any_helpers.h index 50bd6d68fba5..9c906e6bf76a 100644 --- a/test/support/experimental_any_helpers.h +++ b/test/support/experimental_any_helpers.h @@ -55,6 +55,7 @@ void assertEmpty(std::experimental::any const& a) { // Assert that an 'any' object stores the specified 'Type' and 'value'. template +_LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST void assertContains(std::experimental::any const& a, int value = 1) { assert(!a.empty()); RTTI_ASSERT(a.type() == typeid(Type)); @@ -64,6 +65,7 @@ void assertContains(std::experimental::any const& a, int value = 1) { // Modify the value of a "test type" stored within an any to the specified // 'value'. template +_LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST void modifyValue(std::experimental::any& a, int value) { assert(!a.empty()); RTTI_ASSERT(a.type() == typeid(Type)); diff --git a/www/upcoming_meeting.html b/www/upcoming_meeting.html index 90a192828e57..839eea825c5c 100644 --- a/www/upcoming_meeting.html +++ b/www/upcoming_meeting.html @@ -36,7 +36,7 @@

This is a temporary page; please check the c++1z status here

-

This page shows the status of the papers and issues that are expected to be adopted in Issaquah.

+

This page shows the status of the papers and issues that are expected to be adopted in Toronto.

The groups that have contributed papers:

    @@ -45,7 +45,7 @@
  • SG1 - Study group #1 (Concurrency working group)

- +

Paper Status

@@ -59,71 +59,41 @@
Paper #GroupPaper NameMeetingStatusFirst released version
- - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + - -
Issue #Issue NameMeetingStatus
2260Missing requirement for Allocator::pointerKona
2768any_cast and move semanticsKona
2769Redundant const in the return type of any_cast(const any&)Kona
2781Contradictory requirements for std::function and std::reference_wrapperKona
2782scoped_allocator_adaptor constructors must be constrainedKona
2784Resolution to LWG 2484 is missing "otherwise, no effects" and is hard to parseKonaPatch Ready
2785quoted should work with basic_string_viewKonaWe do this already
2786Annex C should mention shared_ptr changes for array supportKonaNothing to do
2787§[file_status.cons] doesn't match class definitionKona
2789Equivalence of contained objectsKona
2794Missing requirements for allocator pointersKona
2795§[global.functions] provides incorrect example of ADL useKonaNothing to do
2804Unconditional constexpr default constructor for istream_iteratorKonaWe do this already
2812Range access is available with <string_view>KonaWe do this already
2824list::sort should say that the order of elements is unspecified if an exception is thrownKona
2826string_view iterators use old wordingKonaNothing to do
2834Resolution LWG 2223 is missing wording about end iteratorsKonaNothing to do
2835LWG 2536 seems to misspecify <tgmath.h>Kona
2837gcd and lcm should support a wider range of input valuesKonaWe do this already
2838is_literal_type specification needs a little cleanupKonaNothing to do
2842in_place_t check for optional::optional(U&&) should decay UKona
2850std::function move constructor does unnecessary workKona
2853Possible inconsistency in specification of erase in [vector.modifiers]Kona
2855std::throw_with_nested("string_literal")Kona
2444Inconsistent complexity for std::sort_heapToronto
2593Moved-from state of AllocatorsToronto
2597std::log misspecified for complex numbersToronto
2783stack::emplace() and queue::emplace() should return decltype(auto)Toronto
2932Constraints on parallel algorithm implementations are underspecifiedToronto
2937Is equivalent("existing_thing", "not_existing_thing") an error?Toronto
2940result_of specification also needs a little cleanupToronto
2942LWG 2873's resolution missed weak_ptr::owner_beforeToronto
2954Specialization of the convenience variable templates should be prohibitedToronto
2961Bad postcondition for set_default_resourceToronto
2966Incomplete resolution of US 74Toronto
2974Diagnose out of bounds tuple_element/variant_alternativeToronto
Priority 1 Bugs
2665remove_filename() post condition is incorrectKonaWe do this already
2806Base class of bad_optional_accessKonaWe do this already
2857{variant,optional,any}::emplace should return the constructed valueKona

Comments about the issues

    -
  • 2260 - Check our current allocators to make sure we do this
  • -
  • 2768 - Resolution is "apply 2769"
  • -
  • 2769 - This should be easy; trick will be devising tests.
  • -
  • 2781 -
  • -
  • 2782 - Looks straightforward.
  • -
  • 2784 - Patch Ready
  • -
  • 2785 - We do this already.
  • -
  • 2786 - Nothing to do; just moving words around
  • -
  • 2787 - Eric?
  • -
  • 2789 - I don't think there are any code changes required here
  • -
  • 2794 - I don't think there are any code changes required here - maybe a static_assert.
  • -
  • 2795 - Nothing to do; just moving words around
  • -
  • 2804 - We do this already.
  • -
  • 2812 - We do this already.
  • -
  • 2824 - Nothing do to here, but we should add some throwing sort tests.
  • -
  • 2826 - Nothing to do; just moving words around
  • -
  • 2834 - Nothing to do; just moving words around
  • -
  • 2835 - I'm pretty sure we already do this.
  • -
  • 2837 - Added some tests to ensure we do this already.
  • -
  • 2838 - Nothing to do; just moving words around
  • -
  • 2842 - This should be easy; trick will be devising tests.
  • -
  • 2850 - I think we already do this.
  • -
  • 2853 - I think that this will be mostly adding tests.
  • -
  • 2855 - This should be easy; trick will be devising tests.
  • -
  • -
  • 2665 - We do this already.
  • -
  • 2806 - We do this already.
  • -
  • 2857 - This is the subject of several NB comments.
  • +
  • 2444 -
  • +
  • 2593 -
  • +
  • 2597 - I think we do this already; probably needs tests
  • +
  • 2783 - should be easy to change; needs tests
  • +
  • 2932 - We're not doing the parallel algorithms yet.
  • +
  • 2937 - file system; Eric?
  • +
  • 2940 - We haven't implemented result_of yet, but I don't think that this will require any changes.
  • +
  • 2942 - all of our owner_before overloads are already noexcept; just need to update the tests.
  • +
  • 2954 - I don't think there's anything to do here.
  • +
  • 2961 - We haven't implemented the PMR stuff yet.
  • +
  • 2966 - Wording cleanup; no code or test changes needed.
  • +
  • 2974 - I have some code lying around that does this.
-

Last Updated: 7-Feb-2017

+

Last Updated: 25-Jun-2017

From fa469f0e6684bd53b67d41bda9b789d07f51d824 Mon Sep 17 00:00:00 2001 From: dim Date: Sat, 1 Jul 2017 13:24:37 +0000 Subject: [PATCH 2/5] Vendor import of libc++ trunk r306956: https://llvm.org/svn/llvm-project/libcxx/trunk@306956 --- include/__config | 6 ---- include/new | 20 +++++------ include/string | 8 ++--- .../new_deployment.fail.cpp | 36 ------------------- .../pairs/pair.astuple/tuple_element.fail.cpp | 22 ++++++++++++ .../variant_alternative.fail.cpp | 32 +++++++++++++++++ utils/libcxx/test/target_info.py | 11 +++--- www/upcoming_meeting.html | 4 +-- 8 files changed, 76 insertions(+), 63 deletions(-) delete mode 100644 test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp create mode 100644 test/std/utilities/utility/pairs/pair.astuple/tuple_element.fail.cpp create mode 100644 test/std/utilities/variant/variant.helpers/variant_alternative.fail.cpp diff --git a/include/__config b/include/__config index aae053102f55..003e1ea60c60 100644 --- a/include/__config +++ b/include/__config @@ -1176,11 +1176,6 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) -#define _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION \ - __attribute__((availability(macosx,strict,introduced=10.13))) \ - __attribute__((availability(ios,strict,introduced=11.0))) \ - __attribute__((availability(tvos,strict,introduced=11.0))) \ - __attribute__((availability(watchos,strict,introduced=4.0))) #else #define _LIBCPP_AVAILABILITY_SHARED_MUTEX #define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS @@ -1192,7 +1187,6 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE #define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY #define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR -#define _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION #endif // Define availability that depends on _LIBCPP_NO_EXCEPTIONS. diff --git a/include/new b/include/new index 3945faee0e38..34df2efee09e 100644 --- a/include/new +++ b/include/new @@ -193,20 +193,20 @@ _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operato #endif #ifndef _LIBCPP_HAS_NO_ALIGNED_ALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::align_val_t) _NOEXCEPT; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; +_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new(std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete(void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete(void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void* operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t) _THROW_BAD_ALLOC; +_LIBCPP_OVERRIDABLE_FUNC_VIS void* operator new[](std::size_t __sz, std::align_val_t, const std::nothrow_t&) _NOEXCEPT _NOALIAS; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS void operator delete[](void* __p, std::align_val_t, const std::nothrow_t&) _NOEXCEPT; #ifndef _LIBCPP_HAS_NO_SIZED_DEALLOCATION -_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_ALIGNED_ALLOCATION void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; +_LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE void operator delete[](void* __p, std::size_t __sz, std::align_val_t) _NOEXCEPT; #endif #endif diff --git a/include/string b/include/string index accf1ce27716..d1a3a1f8e6cc 100644 --- a/include/string +++ b/include/string @@ -4004,6 +4004,10 @@ basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* #endif // _LIBCPP_DEBUG_LEVEL >= 2 +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) +_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) +_LIBCPP_EXTERN_TEMPLATE(string operator+, allocator >(char const*, string const&)) + #if _LIBCPP_STD_VER > 11 // Literal suffixes for basic_string [basic.string.literals] inline namespace literals @@ -4037,10 +4041,6 @@ inline namespace literals } #endif -_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) -_LIBCPP_EXTERN_TEMPLATE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS basic_string) -_LIBCPP_EXTERN_TEMPLATE(string operator+, allocator >(char const*, string const&)) - _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp b/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp deleted file mode 100644 index f9f487dc7ef8..000000000000 --- a/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_deployment.fail.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++98, c++03, c++11, c++14 -// REQUIRES: availability=macosx10.12 - -// test availability of new/delete operators introduced in c++17. - -#include - -int main () { - int *p0 = new ((std::align_val_t)16) int(1); - (void)p0; - int *p1 = new ((std::align_val_t)16) int[1]; - (void)p1; - // expected-error@-4 {{call to unavailable function 'operator new': introduced in macOS 10.13}} - // expected-note@new:* {{candidate function has been explicitly made unavailable}} - // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'const std::nothrow_t' for 2nd argument}} - // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'void *' for 2nd argument}} - // expected-note@new:* {{candidate function not viable: requires single argument '__sz', but 2 arguments were provided}} - // expected-note@new:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} - - // expected-error@-9 {{call to unavailable function 'operator new[]': introduced in macOS 10.13}} - // expected-note@new:* {{candidate function has been explicitly made unavailable}} - // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'const std::nothrow_t' for 2nd argument}} - // expected-note@new:* {{candidate function not viable: no known conversion from 'std::align_val_t' to 'void *' for 2nd argument}} - // expected-note@new:* {{candidate function not viable: requires single argument '__sz', but 2 arguments were provided}} - // expected-note@new:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} - return 0; -} diff --git a/test/std/utilities/utility/pairs/pair.astuple/tuple_element.fail.cpp b/test/std/utilities/utility/pairs/pair.astuple/tuple_element.fail.cpp new file mode 100644 index 000000000000..8e994126cc0d --- /dev/null +++ b/test/std/utilities/utility/pairs/pair.astuple/tuple_element.fail.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// + +// template struct pair + +// tuple_element >::type + +#include + +int main() +{ + typedef std::pair T; + typename std::tuple_element<2, T>::type foo; // expected-error@utility:* {{Index out of bounds in std::tuple_element>}} +} diff --git a/test/std/utilities/variant/variant.helpers/variant_alternative.fail.cpp b/test/std/utilities/variant/variant.helpers/variant_alternative.fail.cpp new file mode 100644 index 000000000000..cbaa2a568d5b --- /dev/null +++ b/test/std/utilities/variant/variant.helpers/variant_alternative.fail.cpp @@ -0,0 +1,32 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++98, c++03, c++11, c++14 + +// + +// template struct variant_alternative; // undefined +// template struct variant_alternative; +// template struct variant_alternative; +// template struct variant_alternative; +// template +// using variant_alternative_t = typename variant_alternative::type; +// +// template +// struct variant_alternative>; + +#include +#include +#include + +int main() { + using V = std::variant; + typename std::variant_alternative<4, V>::type foo; // expected-error@variant:* {{Index out of bounds in std::variant_alternative<>}} +} diff --git a/utils/libcxx/test/target_info.py b/utils/libcxx/test/target_info.py index 2104432f03c0..b3bbc0088542 100644 --- a/utils/libcxx/test/target_info.py +++ b/utils/libcxx/test/target_info.py @@ -8,11 +8,11 @@ #===----------------------------------------------------------------------===// import importlib -import lit.util # pylint: disable=import-error,no-name-in-module import locale import os import platform import re +import subprocess import sys class DefaultTargetInfo(object): @@ -73,12 +73,13 @@ def __init__(self, full_config): super(DarwinLocalTI, self).__init__(full_config) def is_host_macosx(self): - name = lit.util.capture(['sw_vers', '-productName']).strip() + name = subprocess.check_output(['sw_vers', '-productName']).strip() return name == "Mac OS X" def get_macosx_version(self): assert self.is_host_macosx() - version = lit.util.capture(['sw_vers', '-productVersion']).strip() + version = subprocess.check_output( + ['sw_vers', '-productVersion']).strip() version = re.sub(r'([0-9]+\.[0-9]+)(\..*)?', r'\1', version) return version @@ -86,7 +87,7 @@ def get_sdk_version(self, name): assert self.is_host_macosx() cmd = ['xcrun', '--sdk', name, '--show-sdk-path'] try: - out = lit.util.capture(cmd).strip() + out = subprocess.check_output(cmd).strip() except OSError: pass @@ -127,7 +128,7 @@ def add_cxx_compile_flags(self, flags): else: cmd = ['xcrun', '--show-sdk-path'] try: - out = lit.util.capture(cmd).strip() + out = subprocess.check_output(cmd).strip() res = 0 except OSError: res = -1 diff --git a/www/upcoming_meeting.html b/www/upcoming_meeting.html index 839eea825c5c..c7bd83fb39ca 100644 --- a/www/upcoming_meeting.html +++ b/www/upcoming_meeting.html @@ -90,10 +90,10 @@
  • 2954 - I don't think there's anything to do here.
  • 2961 - We haven't implemented the PMR stuff yet.
  • 2966 - Wording cleanup; no code or test changes needed.
  • -
  • 2974 - I have some code lying around that does this.
  • +
  • 2974 - I did this in r305196. Tests added in 306580
  • -

    Last Updated: 25-Jun-2017

    +

    Last Updated: 28-Jun-2017

    From 71f9bc205920600702ea736242c9b7a3e9b5ee94 Mon Sep 17 00:00:00 2001 From: dim Date: Thu, 13 Jul 2017 19:25:18 +0000 Subject: [PATCH 3/5] Vendor import of llvm trunk r307894: https://llvm.org/svn/llvm-project/llvm/trunk@307894 --- CMakeLists.txt | 4 + cmake/modules/AddLLVM.cmake | 26 +- cmake/modules/HandleLLVMOptions.cmake | 4 +- cmake/modules/LLVMExternalProjectUtils.cmake | 10 +- docs/AMDGPUUsage.rst | 4 +- docs/CMake.rst | 5 + docs/CMakePrimer.rst | 27 - docs/CommandGuide/lit.rst | 7 + docs/CommandGuide/llvm-cov.rst | 6 + docs/CommandGuide/llvm-profdata.rst | 6 + docs/Coroutines.rst | 2 +- docs/Docker.rst | 8 +- docs/Dummy.html | 0 docs/HowToAddABuilder.rst | 3 + docs/LangRef.rst | 236 +- docs/LibFuzzer.rst | 2 +- docs/tutorial/BuildingAJIT1.rst | 8 +- docs/tutorial/BuildingAJIT2.rst | 4 +- docs/tutorial/LangImpl02.rst | 2 +- docs/tutorial/LangImpl03.rst | 6 +- docs/tutorial/LangImpl04.rst | 2 +- docs/tutorial/LangImpl05.rst | 4 +- docs/tutorial/LangImpl06.rst | 4 +- docs/tutorial/OCamlLangImpl5.rst | 2 +- .../BuildingAJIT/Chapter1/KaleidoscopeJIT.h | 8 +- .../BuildingAJIT/Chapter1/toy.cpp | 2 +- .../BuildingAJIT/Chapter2/KaleidoscopeJIT.h | 8 +- .../BuildingAJIT/Chapter2/toy.cpp | 2 +- .../BuildingAJIT/Chapter3/KaleidoscopeJIT.h | 7 +- .../BuildingAJIT/Chapter3/toy.cpp | 2 +- .../BuildingAJIT/Chapter4/KaleidoscopeJIT.h | 10 +- .../BuildingAJIT/Chapter4/toy.cpp | 2 +- .../BuildingAJIT/Chapter5/KaleidoscopeJIT.h | 25 +- .../BuildingAJIT/Chapter5/toy.cpp | 2 +- examples/Kaleidoscope/Chapter4/toy.cpp | 2 +- examples/Kaleidoscope/Chapter5/toy.cpp | 2 +- examples/Kaleidoscope/Chapter6/toy.cpp | 2 +- examples/Kaleidoscope/Chapter7/toy.cpp | 2 +- .../Kaleidoscope/include/KaleidoscopeJIT.h | 10 +- include/llvm-c/OrcBindings.h | 28 +- include/llvm/ADT/APInt.h | 6 +- include/llvm/ADT/STLExtras.h | 4 + include/llvm/ADT/SmallPtrSet.h | 11 +- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 2 +- include/llvm/Analysis/CGSCCPassManager.h | 17 +- include/llvm/Analysis/InlineCost.h | 2 +- include/llvm/Analysis/LazyCallGraph.h | 18 +- include/llvm/Analysis/MemoryBuiltins.h | 3 + include/llvm/Analysis/RegionInfoImpl.h | 8 +- include/llvm/Analysis/TargetTransformInfo.h | 41 + .../llvm/Analysis/TargetTransformInfoImpl.h | 14 + include/llvm/Analysis/ValueTracking.h | 3 +- include/llvm/BinaryFormat/Wasm.h | 4 +- include/llvm/Bitcode/LLVMBitCodes.h | 12 +- include/llvm/CodeGen/AsmPrinter.h | 4 +- include/llvm/CodeGen/BasicTTIImpl.h | 2 +- .../CodeGen/GlobalISel/InstructionSelector.h | 158 + .../GlobalISel/InstructionSelectorImpl.h | 337 + .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 8 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 53 + include/llvm/CodeGen/LiveRegUnits.h | 10 +- include/llvm/CodeGen/MachineFunction.h | 2 +- include/llvm/CodeGen/MachineMemOperand.h | 15 +- include/llvm/CodeGen/RuntimeLibcalls.h | 23 + include/llvm/CodeGen/ScheduleDAG.h | 8 +- include/llvm/CodeGen/SelectionDAG.h | 4 +- include/llvm/CodeGen/SelectionDAGNodes.h | 8 +- .../llvm/DebugInfo/CodeView/SymbolRecord.h | 4 + include/llvm/DebugInfo/CodeView/TypeIndex.h | 21 +- include/llvm/DebugInfo/DIContext.h | 24 +- include/llvm/DebugInfo/DWARF/DWARFContext.h | 25 +- .../PDB/Native/DbiModuleDescriptorBuilder.h | 6 + include/llvm/DebugInfo/PDB/Native/DbiStream.h | 2 + .../DebugInfo/PDB/Native/DbiStreamBuilder.h | 12 +- .../DebugInfo/PDB/Native/NamedStreamMap.h | 2 +- .../PDB/Native/NativeBuiltinSymbol.h | 49 + .../PDB/Native/NativeCompilandSymbol.h | 2 +- .../DebugInfo/PDB/Native/NativeExeSymbol.h | 2 +- .../DebugInfo/PDB/Native/NativeRawSymbol.h | 6 +- .../llvm/DebugInfo/PDB/Native/NativeSession.h | 7 + .../DebugInfo/PDB/Native/PDBFileBuilder.h | 4 + .../DebugInfo/PDB/Native/PDBStringTable.h | 1 - .../llvm/DebugInfo/PDB/Native/PublicsStream.h | 4 +- .../PDB/Native/PublicsStreamBuilder.h | 54 + include/llvm/DebugInfo/PDB/Native/RawTypes.h | 13 + include/llvm/ExecutionEngine/JITSymbol.h | 88 +- .../Orc/CompileOnDemandLayer.h | 171 +- .../llvm/ExecutionEngine/Orc/ExecutionUtils.h | 25 +- .../ExecutionEngine/Orc/GlobalMappingLayer.h | 36 +- .../llvm/ExecutionEngine/Orc/IRCompileLayer.h | 18 +- .../ExecutionEngine/Orc/IRTransformLayer.h | 16 +- .../llvm/ExecutionEngine/Orc/LambdaResolver.h | 2 +- .../ExecutionEngine/Orc/LazyEmittingLayer.h | 88 +- .../Orc/ObjectTransformLayer.h | 17 +- include/llvm/ExecutionEngine/Orc/OrcError.h | 15 +- .../Orc/RTDyldObjectLinkingLayer.h | 31 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 15 - include/llvm/IR/Constants.h | 10 - include/llvm/IR/IRBuilder.h | 20 +- include/llvm/IR/Instructions.h | 210 +- include/llvm/IR/IntrinsicInst.h | 169 + include/llvm/IR/Intrinsics.td | 16 + include/llvm/IR/LLVMContext.h | 28 + include/llvm/IR/Module.h | 2 +- include/llvm/IR/ModuleSummaryIndex.h | 10 +- include/llvm/IR/PassManager.h | 31 +- include/llvm/IR/PatternMatch.h | 73 +- include/llvm/IR/SafepointIRVerifier.h | 35 + include/llvm/IR/Type.h | 6 + include/llvm/InitializePasses.h | 2 + include/llvm/MC/MCAsmBackend.h | 2 +- .../llvm/MC/MCDisassembler/MCDisassembler.h | 1 + include/llvm/MC/MCMachObjectWriter.h | 3 +- include/llvm/MC/MCObjectWriter.h | 2 +- include/llvm/MC/MCSymbolWasm.h | 14 +- include/llvm/Object/COFF.h | 3 + include/llvm/Object/Wasm.h | 14 +- include/llvm/ObjectYAML/WasmYAML.h | 3 +- include/llvm/Option/OptTable.h | 3 +- include/llvm/Passes/PassBuilder.h | 294 +- include/llvm/ProfileData/InstrProf.h | 54 +- include/llvm/ProfileData/InstrProfReader.h | 37 +- include/llvm/ProfileData/InstrProfWriter.h | 16 +- include/llvm/ProfileData/ProfileCommon.h | 3 +- include/llvm/Support/BlockFrequency.h | 4 + include/llvm/Support/Compiler.h | 10 + include/llvm/Support/DynamicLibrary.h | 16 + include/llvm/Support/ErrorHandling.h | 48 +- .../llvm/Support/GenericDomTreeConstruction.h | 169 +- include/llvm/Support/ReverseIteration.h | 17 + include/llvm/Support/UnicodeCharRanges.h | 7 +- .../Target/GlobalISel/SelectionDAGCompat.td | 1 + include/llvm/Target/TargetInstrInfo.h | 10 + include/llvm/Target/TargetLowering.h | 30 +- include/llvm/Transforms/Scalar/GVN.h | 30 +- .../Transforms/Utils/LowerMemIntrinsics.h | 26 +- .../llvm/Transforms/Utils/SSAUpdaterImpl.h | 10 +- include/llvm/module.modulemap | 1 + lib/Analysis/BasicAliasAnalysis.cpp | 2 +- lib/Analysis/BranchProbabilityInfo.cpp | 2 +- lib/Analysis/CGSCCPassManager.cpp | 206 +- lib/Analysis/CaptureTracking.cpp | 4 +- lib/Analysis/DemandedBits.cpp | 15 +- lib/Analysis/DependenceAnalysis.cpp | 7 +- lib/Analysis/InstructionSimplify.cpp | 15 +- lib/Analysis/LazyCallGraph.cpp | 20 +- lib/Analysis/Lint.cpp | 2 +- lib/Analysis/LoopInfo.cpp | 6 +- lib/Analysis/MemoryBuiltins.cpp | 41 +- lib/Analysis/ModuleSummaryAnalysis.cpp | 2 +- lib/Analysis/ScalarEvolution.cpp | 20 +- lib/Analysis/TargetTransformInfo.cpp | 25 + lib/Analysis/ValueTracking.cpp | 55 +- lib/Analysis/VectorUtils.cpp | 2 +- lib/AsmParser/LLLexer.cpp | 2 +- lib/AsmParser/LLParser.cpp | 74 +- lib/AsmParser/LLParser.h | 3 +- lib/AsmParser/LLToken.h | 2 +- lib/Bitcode/Reader/BitcodeReader.cpp | 92 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 64 +- lib/CodeGen/AtomicExpandPass.cpp | 8 +- lib/CodeGen/CodeGen.cpp | 1 + lib/CodeGen/CodeGenPrepare.cpp | 37 +- lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 +- .../GlobalISel/InstructionSelector.cpp | 7 + lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 36 +- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 42 +- lib/CodeGen/LiveRegUnits.cpp | 2 +- lib/CodeGen/MIRParser/MILexer.cpp | 10 + lib/CodeGen/MIRParser/MILexer.h | 6 +- lib/CodeGen/MIRParser/MIParser.cpp | 85 +- lib/CodeGen/MIRPrinter.cpp | 54 +- lib/CodeGen/MachineBlockPlacement.cpp | 37 +- lib/CodeGen/MachineFunction.cpp | 12 +- lib/CodeGen/MachineInstr.cpp | 20 +- lib/CodeGen/MachineVerifier.cpp | 8 + lib/CodeGen/MacroFusion.cpp | 2 +- lib/CodeGen/PostRAHazardRecognizer.cpp | 2 +- lib/CodeGen/RegAllocFast.cpp | 13 +- lib/CodeGen/RegAllocGreedy.cpp | 2 +- lib/CodeGen/RegAllocPBQP.cpp | 2 - lib/CodeGen/RegisterCoalescer.cpp | 28 + lib/CodeGen/RegisterScavenging.cpp | 15 +- lib/CodeGen/ScheduleDAG.cpp | 91 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 197 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 20 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 93 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 16 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 20 +- .../SelectionDAG/LegalizeTypesGeneric.cpp | 2 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 4 + lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 53 +- .../SelectionDAGAddressAnalysis.cpp | 28 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 134 +- .../SelectionDAG/SelectionDAGBuilder.h | 6 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 9 +- lib/CodeGen/SjLjEHPrepare.cpp | 7 +- lib/CodeGen/SplitKit.cpp | 8 +- lib/CodeGen/TargetLoweringBase.cpp | 54 + lib/DebugInfo/CodeView/SymbolDumper.cpp | 91 +- lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp | 12 + lib/DebugInfo/DWARF/DWARFContext.cpp | 130 +- lib/DebugInfo/DWARF/DWARFDie.cpp | 2 +- lib/DebugInfo/PDB/CMakeLists.txt | 2 + .../PDB/Native/DbiModuleDescriptorBuilder.cpp | 6 +- lib/DebugInfo/PDB/Native/DbiStream.cpp | 7 + lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp | 26 +- lib/DebugInfo/PDB/Native/NamedStreamMap.cpp | 6 +- .../PDB/Native/NativeBuiltinSymbol.cpp | 48 + .../PDB/Native/NativeCompilandSymbol.cpp | 2 +- lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp | 2 +- lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp | 2 +- lib/DebugInfo/PDB/Native/NativeSession.cpp | 60 +- lib/DebugInfo/PDB/Native/PDBFile.cpp | 9 +- lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp | 27 +- lib/DebugInfo/PDB/Native/PDBStringTable.cpp | 2 +- lib/DebugInfo/PDB/Native/PublicsStream.cpp | 16 +- .../PDB/Native/PublicsStreamBuilder.cpp | 89 + lib/ExecutionEngine/MCJIT/MCJIT.cpp | 19 +- lib/ExecutionEngine/Orc/OrcCBindings.cpp | 35 +- lib/ExecutionEngine/Orc/OrcCBindingsStack.h | 104 +- lib/ExecutionEngine/Orc/OrcError.cpp | 21 + lib/ExecutionEngine/Orc/OrcMCJITReplacement.h | 23 +- .../RuntimeDyld/RuntimeDyld.cpp | 33 +- .../RuntimeDyld/RuntimeDyldCOFF.cpp | 7 +- .../RuntimeDyld/RuntimeDyldChecker.cpp | 2 +- .../RuntimeDyld/RuntimeDyldELF.cpp | 3 +- .../RuntimeDyld/RuntimeDyldImpl.h | 2 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 3 +- lib/Fuzzer/CMakeLists.txt | 2 +- lib/Fuzzer/FuzzerCorpus.h | 81 +- lib/Fuzzer/FuzzerDriver.cpp | 6 +- lib/Fuzzer/FuzzerExtFunctionsWeak.cpp | 3 +- lib/Fuzzer/FuzzerFlags.def | 4 +- lib/Fuzzer/FuzzerIOWindows.cpp | 4 +- lib/Fuzzer/FuzzerInternal.h | 8 +- lib/Fuzzer/FuzzerLoop.cpp | 66 +- lib/Fuzzer/FuzzerOptions.h | 1 + lib/Fuzzer/FuzzerUtilDarwin.cpp | 13 +- lib/Fuzzer/test/CMakeLists.txt | 3 +- lib/Fuzzer/test/FuzzerUnittest.cpp | 5 +- .../test/ShrinkControlFlowSimpleTest.cpp | 19 + lib/Fuzzer/test/reduce_inputs.test | 13 + lib/IR/AsmWriter.cpp | 66 +- lib/IR/CMakeLists.txt | 1 + lib/IR/ConstantFold.cpp | 38 +- lib/IR/Constants.cpp | 73 +- lib/IR/Core.cpp | 22 +- lib/IR/Instruction.cpp | 11 +- lib/IR/Instructions.cpp | 74 +- lib/IR/LLVMContext.cpp | 20 + lib/IR/LLVMContextImpl.cpp | 14 + lib/IR/LLVMContextImpl.h | 14 + lib/IR/Module.cpp | 4 +- lib/IR/SafepointIRVerifier.cpp | 437 + lib/IR/Type.cpp | 2 +- lib/IR/Verifier.cpp | 96 +- lib/LTO/LTO.cpp | 11 +- lib/Linker/IRMover.cpp | 18 +- lib/MC/ELFObjectWriter.cpp | 10 +- lib/MC/MCAssembler.cpp | 16 +- lib/MC/MachObjectWriter.cpp | 2 +- lib/MC/WasmObjectWriter.cpp | 159 +- lib/MC/WinCOFFObjectWriter.cpp | 11 +- lib/Object/WasmObjectFile.cpp | 40 +- lib/Object/WindowsResource.cpp | 4 +- lib/ObjectYAML/WasmYAML.cpp | 3 +- lib/Option/OptTable.cpp | 8 +- lib/Passes/PassBuilder.cpp | 262 +- lib/ProfileData/InstrProf.cpp | 46 +- lib/ProfileData/InstrProfReader.cpp | 20 +- lib/ProfileData/InstrProfWriter.cpp | 43 +- lib/Support/CommandLine.cpp | 2 +- lib/Support/DynamicLibrary.cpp | 43 +- lib/Support/ErrorHandling.cpp | 62 +- lib/Support/Host.cpp | 405 +- lib/Support/Mutex.cpp | 5 + lib/Support/Unix/DynamicLibrary.inc | 3 + lib/Support/Unix/Host.inc | 25 +- lib/Support/Unix/Program.inc | 3 - lib/Support/Windows/DynamicLibrary.inc | 2 + lib/Support/Windows/Host.inc | 4 + .../AArch64/AArch64A57FPLoadBalancing.cpp | 2 +- .../AArch64CleanupLocalDynamicTLSPass.cpp | 2 +- lib/Target/AArch64/AArch64CondBrTuning.cpp | 2 + lib/Target/AArch64/AArch64FastISel.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +- lib/Target/AArch64/AArch64InstrInfo.cpp | 32 +- lib/Target/AArch64/AArch64InstrInfo.h | 6 +- lib/Target/AArch64/AArch64InstrInfo.td | 11 +- .../AArch64/AArch64InstructionSelector.cpp | 13 +- lib/Target/AArch64/AArch64LegalizerInfo.cpp | 5 +- .../AArch64RedundantCopyElimination.cpp | 1 + lib/Target/AArch64/AArch64Subtarget.cpp | 5 +- lib/Target/AArch64/AArch64Subtarget.h | 7 + lib/Target/AArch64/AArch64TargetMachine.cpp | 6 +- .../Disassembler/AArch64Disassembler.cpp | 4 +- .../MCTargetDesc/AArch64AsmBackend.cpp | 4 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 2 + .../AMDGPU/AMDGPUAnnotateUniformValues.cpp | 9 +- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 4 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 7 +- lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp | 13 +- lib/Target/AMDGPU/AMDGPUMacroFusion.cpp | 64 + lib/Target/AMDGPU/AMDGPUMacroFusion.h | 19 + lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 58 +- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 47 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 166 +- lib/Target/AMDGPU/CMakeLists.txt | 1 + lib/Target/AMDGPU/GCNIterativeScheduler.cpp | 2 +- lib/Target/AMDGPU/GCNMinRegStrategy.cpp | 2 +- lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +- lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +- lib/Target/AMDGPU/GCNSchedStrategy.h | 2 +- .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp | 4 +- lib/Target/AMDGPU/MIMGInstructions.td | 1 + .../AMDGPU/R600ControlFlowFinalizer.cpp | 2 +- lib/Target/AMDGPU/R600ISelLowering.cpp | 3 +- lib/Target/AMDGPU/R600ISelLowering.h | 3 +- lib/Target/AMDGPU/R600MachineScheduler.cpp | 2 +- lib/Target/AMDGPU/SIFoldOperands.cpp | 1 + lib/Target/AMDGPU/SIISelLowering.cpp | 124 +- lib/Target/AMDGPU/SIISelLowering.h | 3 +- lib/Target/AMDGPU/SIInstrInfo.cpp | 20 + lib/Target/AMDGPU/SIInstrInfo.h | 8 + lib/Target/AMDGPU/SIInstrInfo.td | 2 + lib/Target/AMDGPU/SIMachineScheduler.cpp | 2 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 76 +- .../AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp | 2 +- lib/Target/AMDGPU/VOP3PInstructions.td | 28 +- lib/Target/AMDGPU/VOPInstructions.td | 18 +- lib/Target/ARM/ARMAsmPrinter.cpp | 1 + lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 + lib/Target/ARM/ARMBaseRegisterInfo.cpp | 19 +- lib/Target/ARM/ARMCallLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 20 +- lib/Target/ARM/ARMISelLowering.h | 3 +- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/ARMInstructionSelector.cpp | 316 +- lib/Target/ARM/ARMLegalizerInfo.cpp | 207 +- lib/Target/ARM/ARMLegalizerInfo.h | 33 + lib/Target/ARM/ARMRegisterBankInfo.cpp | 32 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 18 + lib/Target/ARM/ARMTargetTransformInfo.h | 36 + lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 11 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 15 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h | 8 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 2 +- lib/Target/AVR/AVRAsmPrinter.cpp | 5 +- lib/Target/AVR/AVRDevices.td | 23 +- lib/Target/AVR/AVRInstrInfo.cpp | 72 +- lib/Target/AVR/AVRInstrInfo.h | 4 + lib/Target/AVR/AVRInstrInfo.td | 32 +- lib/Target/AVR/AVRMCInstLower.cpp | 16 +- lib/Target/AVR/AVRRegisterInfo.cpp | 11 +- lib/Target/AVR/AVRRegisterInfo.td | 7 +- lib/Target/AVR/AVRTargetMachine.cpp | 6 + lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 1 + lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp | 2 +- .../AVR/MCTargetDesc/AVRELFStreamer.cpp | 2 +- lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp | 4 +- .../Hexagon/AsmParser/HexagonAsmParser.cpp | 1 + lib/Target/Hexagon/HexagonBitSimplify.cpp | 2 + lib/Target/Hexagon/HexagonBitTracker.cpp | 1 + .../Hexagon/HexagonConstPropagation.cpp | 1 + lib/Target/Hexagon/HexagonFrameLowering.cpp | 14 +- lib/Target/Hexagon/HexagonGenPredicate.cpp | 1 + lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 46 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 56 +- lib/Target/Hexagon/HexagonISelLowering.h | 27 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 74 +- lib/Target/Hexagon/HexagonInstrInfo.h | 21 + .../Hexagon/HexagonMachineScheduler.cpp | 2 +- lib/Target/Hexagon/HexagonPatterns.td | 169 +- lib/Target/Hexagon/HexagonPseudo.td | 10 + lib/Target/Hexagon/HexagonSplitDouble.cpp | 2 + lib/Target/Hexagon/HexagonTargetMachine.cpp | 10 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 12 +- .../MCTargetDesc/HexagonAsmBackend.cpp | 7 +- .../Hexagon/MCTargetDesc/HexagonBaseInfo.h | 13 +- .../Hexagon/MCTargetDesc/HexagonShuffler.cpp | 2 + lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp | 1 + .../Lanai/MCTargetDesc/LanaiAsmBackend.cpp | 4 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 282 + .../Mips/MCTargetDesc/MipsABIFlagsSection.h | 2 + .../Mips/MCTargetDesc/MipsAsmBackend.cpp | 2 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 2 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 32 + lib/Target/Mips/Mips.td | 2 + lib/Target/Mips/MipsInstrInfo.td | 11 +- lib/Target/Mips/MipsMTInstrFormats.td | 99 + lib/Target/Mips/MipsMTInstrInfo.td | 208 + lib/Target/Mips/MipsSchedule.td | 18 +- lib/Target/Mips/MipsScheduleGeneric.td | 14 +- lib/Target/Mips/MipsScheduleP5600.td | 2 +- lib/Target/Mips/MipsSubtarget.cpp | 3 +- lib/Target/Mips/MipsSubtarget.h | 4 + lib/Target/Mips/MipsTargetStreamer.h | 9 + lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 33 +- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 2 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 5 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 36 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 43 +- lib/Target/PowerPC/PPCISelLowering.cpp | 177 +- lib/Target/PowerPC/PPCISelLowering.h | 7 + lib/Target/PowerPC/PPCInstrInfo.td | 191 + lib/Target/PowerPC/PPCInstrVSX.td | 190 +- lib/Target/PowerPC/PPCScheduleP9.td | 4 +- lib/Target/PowerPC/PPCSubtarget.h | 7 + lib/Target/PowerPC/PPCVSXSwapRemoval.cpp | 4 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 4 +- .../Sparc/MCTargetDesc/SparcAsmBackend.cpp | 3 +- .../MCTargetDesc/SystemZMCAsmBackend.cpp | 4 +- .../SystemZ/SystemZHazardRecognizer.cpp | 2 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 97 +- lib/Target/SystemZ/SystemZISelLowering.h | 15 +- lib/Target/SystemZ/SystemZInstrInfo.td | 74 +- lib/Target/SystemZ/SystemZLDCleanup.cpp | 2 +- .../SystemZ/SystemZMachineScheduler.cpp | 2 +- lib/Target/SystemZ/SystemZOperators.td | 18 +- lib/Target/SystemZ/SystemZScheduleZ13.td | 214 +- lib/Target/SystemZ/SystemZScheduleZ196.td | 4 +- lib/Target/SystemZ/SystemZScheduleZEC12.td | 4 +- .../SystemZ/SystemZTargetTransformInfo.cpp | 5 +- .../WebAssemblyTargetStreamer.cpp | 28 +- .../MCTargetDesc/WebAssemblyTargetStreamer.h | 8 +- .../WebAssembly/WebAssemblyAsmPrinter.cpp | 7 +- lib/Target/WebAssembly/WebAssemblyCFGSort.cpp | 2 +- .../WebAssembly/WebAssemblyMCInstLower.cpp | 2 - .../WebAssemblyRuntimeLibcallSignatures.cpp | 41 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 4 +- .../X86/InstPrinter/X86InstComments.cpp | 4 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 2 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 56 +- lib/Target/X86/Utils/X86ShuffleDecode.h | 8 +- lib/Target/X86/X86.td | 1 + lib/Target/X86/X86CallLowering.cpp | 47 +- lib/Target/X86/X86CallLowering.h | 2 +- lib/Target/X86/X86CallingConv.td | 10 +- lib/Target/X86/X86FastISel.cpp | 3 + lib/Target/X86/X86FrameLowering.cpp | 5 +- lib/Target/X86/X86ISelLowering.cpp | 539 +- lib/Target/X86/X86ISelLowering.h | 13 + lib/Target/X86/X86InstrInfo.cpp | 2 +- lib/Target/X86/X86InstructionSelector.cpp | 155 +- lib/Target/X86/X86LegalizerInfo.cpp | 12 +- lib/Target/X86/X86MCInstLower.cpp | 195 +- lib/Target/X86/X86SchedSandyBridge.td | 2472 +- lib/Target/X86/X86ScheduleBtVer2.td | 77 + lib/Target/X86/X86TargetTransformInfo.cpp | 13 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 4 + lib/Transforms/IPO/FunctionImport.cpp | 25 +- lib/Transforms/IPO/GlobalOpt.cpp | 10 +- lib/Transforms/IPO/Inliner.cpp | 10 +- lib/Transforms/IPO/LowerTypeTests.cpp | 11 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 18 +- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 3 +- .../InstCombine/InstCombineAddSub.cpp | 78 +- .../InstCombine/InstCombineAndOrXor.cpp | 363 +- .../InstCombine/InstCombineCalls.cpp | 259 +- .../InstCombine/InstCombineCasts.cpp | 172 +- .../InstCombine/InstCombineCompares.cpp | 309 +- .../InstCombine/InstCombineInternal.h | 12 +- .../InstCombineLoadStoreAlloca.cpp | 81 +- .../InstCombine/InstCombineMulDivRem.cpp | 93 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 10 +- .../InstCombine/InstCombineSelect.cpp | 190 +- .../InstCombine/InstCombineShifts.cpp | 59 +- .../InstCombineSimplifyDemanded.cpp | 14 +- .../InstCombine/InstCombineVectorOps.cpp | 48 +- .../InstCombine/InstructionCombining.cpp | 164 +- .../Instrumentation/AddressSanitizer.cpp | 2 +- lib/Transforms/Instrumentation/CFGMST.h | 12 +- .../Instrumentation/InstrProfiling.cpp | 157 +- .../Instrumentation/MaximumSpanningTree.h | 6 +- .../Instrumentation/MemorySanitizer.cpp | 7 +- .../Instrumentation/PGOInstrumentation.cpp | 2 +- .../Instrumentation/ThreadSanitizer.cpp | 7 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 57 +- lib/Transforms/Scalar/EarlyCSE.cpp | 2 +- lib/Transforms/Scalar/GVN.cpp | 202 +- lib/Transforms/Scalar/InferAddressSpaces.cpp | 5 +- lib/Transforms/Scalar/JumpThreading.cpp | 2 +- lib/Transforms/Scalar/LoopDeletion.cpp | 39 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 +- lib/Transforms/Scalar/LoopInterchange.cpp | 44 +- lib/Transforms/Scalar/LoopRotation.cpp | 20 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 110 +- .../Scalar/MergedLoadStoreMotion.cpp | 2 +- lib/Transforms/Scalar/NewGVN.cpp | 16 +- lib/Transforms/Scalar/Reassociate.cpp | 2 +- .../Scalar/RewriteStatepointsForGC.cpp | 2 +- lib/Transforms/Scalar/SCCP.cpp | 2 +- lib/Transforms/Scalar/SROA.cpp | 12 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 2 +- lib/Transforms/Utils/CloneFunction.cpp | 20 +- lib/Transforms/Utils/CmpInstAnalysis.cpp | 2 +- lib/Transforms/Utils/CodeExtractor.cpp | 6 - lib/Transforms/Utils/Evaluator.cpp | 2 +- lib/Transforms/Utils/FunctionComparator.cpp | 18 +- lib/Transforms/Utils/Local.cpp | 18 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 143 +- lib/Transforms/Utils/LowerMemIntrinsics.cpp | 288 +- lib/Transforms/Utils/SimplifyCFG.cpp | 4 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 47 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 4 +- lib/Transforms/Utils/VNCoercion.cpp | 15 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 9 +- lib/Transforms/Vectorize/SLPVectorizer.cpp | 45 +- runtimes/CMakeLists.txt | 200 +- runtimes/Components.cmake.in | 1 + test/Analysis/BasicAA/unreachable-block.ll | 2 +- .../Analysis/CostModel/X86/slm-arith-costs.ll | 28 +- .../Analysis/DependenceAnalysis/BasePtrBug.ll | 80 + test/Analysis/ScalarEvolution/guards.ll | 6 +- test/Assembler/2003-11-11-ImplicitRename.ll | 3 +- .../Assembler/2007-11-26-AttributeOverload.ll | 2 +- test/Assembler/atomic.ll | 26 +- test/Bitcode/Inputs/module-hash-strtab1.ll | 10 + test/Bitcode/Inputs/module-hash-strtab2.ll | 10 + test/Bitcode/atomic-no-syncscope.ll | 17 + test/Bitcode/atomic-no-syncscope.ll.bc | Bin 0 -> 1000 bytes test/Bitcode/atomic.ll | 4 +- test/Bitcode/compatibility-3.6.ll | 24 +- test/Bitcode/compatibility-3.7.ll | 24 +- test/Bitcode/compatibility-3.8.ll | 24 +- test/Bitcode/compatibility-3.9.ll | 24 +- test/Bitcode/compatibility-4.0.ll | 24 +- test/Bitcode/compatibility.ll | 24 +- test/Bitcode/memInstructions.3.2.ll | 104 +- test/Bitcode/module-hash-strtab.ll | 15 + test/Bitcode/module_hash.ll | 8 +- ...ction-summary-callgraph-profile-summary.ll | 2 +- ...ummary-callgraph-sample-profile-summary.ll | 2 +- .../AArch64/GlobalISel/arm64-irtranslator.ll | 8 +- .../GlobalISel/select-implicit-def.mir | 30 + .../select-intrinsic-aarch64-sdiv.mir | 38 + test/CodeGen/AArch64/arm64-csldst-mmo.ll | 6 +- .../AArch64/arm64-misched-forwarding-A53.ll | 4 +- .../AArch64/arm64-misched-memdep-bug.ll | 6 +- test/CodeGen/AArch64/fence-singlethread.ll | 2 +- .../AArch64/preferred-function-alignment.ll | 26 + .../CodeGen/AArch64/tailcall_misched_graph.ll | 4 +- test/CodeGen/AMDGPU/add.i16.ll | 10 +- test/CodeGen/AMDGPU/add.ll | 18 +- test/CodeGen/AMDGPU/add.v2i16.ll | 4 +- test/CodeGen/AMDGPU/add_i128.ll | 16 +- test/CodeGen/AMDGPU/add_i64.ll | 8 +- test/CodeGen/AMDGPU/addrspacecast.ll | 33 +- test/CodeGen/AMDGPU/alignbit-pat.ll | 2 +- .../AMDGPU/amdgpu-codegenprepare-fdiv.ll | 38 +- test/CodeGen/AMDGPU/and-gcn.ll | 3 +- test/CodeGen/AMDGPU/and.ll | 55 +- .../CodeGen/AMDGPU/any_extend_vector_inreg.ll | 6 +- test/CodeGen/AMDGPU/bitreverse.ll | 20 +- test/CodeGen/AMDGPU/bswap.ll | 2 +- .../AMDGPU/cgp-addressing-modes-flat.ll | 8 +- test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 6 +- .../AMDGPU/clamp-omod-special-case.mir | 46 +- test/CodeGen/AMDGPU/coalescer_remat.ll | 2 +- .../AMDGPU/constant-fold-imm-immreg.mir | 187 +- .../AMDGPU/constant-fold-mi-operands.ll | 2 +- test/CodeGen/AMDGPU/copy-illegal-type.ll | 62 +- test/CodeGen/AMDGPU/ctlz.ll | 75 +- test/CodeGen/AMDGPU/ctlz_zero_undef.ll | 78 +- test/CodeGen/AMDGPU/ctpop.ll | 91 +- test/CodeGen/AMDGPU/ctpop64.ll | 29 +- test/CodeGen/AMDGPU/cttz_zero_undef.ll | 19 +- test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 88 +- test/CodeGen/AMDGPU/detect-dead-lanes.mir | 10 - test/CodeGen/AMDGPU/ds_read2.ll | 4 +- test/CodeGen/AMDGPU/ds_read2_superreg.ll | 10 +- test/CodeGen/AMDGPU/ds_read2st64.ll | 6 +- test/CodeGen/AMDGPU/early-if-convert-cost.ll | 2 +- test/CodeGen/AMDGPU/early-if-convert.ll | 2 +- .../AMDGPU/enable-no-signed-zeros-fp-math.ll | 11 +- test/CodeGen/AMDGPU/extractelt-to-trunc.ll | 14 +- test/CodeGen/AMDGPU/fabs.f16.ll | 14 +- test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll | 50 +- test/CodeGen/AMDGPU/fadd.f16.ll | 58 +- test/CodeGen/AMDGPU/fadd64.ll | 12 +- .../AMDGPU/fcanonicalize-elimination.ll | 487 + test/CodeGen/AMDGPU/fcanonicalize.f16.ll | 18 +- test/CodeGen/AMDGPU/fcanonicalize.ll | 2 +- test/CodeGen/AMDGPU/fcmp.f16.ll | 312 +- test/CodeGen/AMDGPU/fcmp64.ll | 12 +- test/CodeGen/AMDGPU/fconst64.ll | 9 +- test/CodeGen/AMDGPU/fcopysign.f16.ll | 91 +- test/CodeGen/AMDGPU/fdiv.f16.ll | 6 +- test/CodeGen/AMDGPU/fdiv.ll | 41 +- test/CodeGen/AMDGPU/fma-combine.ll | 34 +- test/CodeGen/AMDGPU/fma.f64.ll | 4 +- test/CodeGen/AMDGPU/fma.ll | 4 +- test/CodeGen/AMDGPU/fmax_legacy.ll | 10 +- test/CodeGen/AMDGPU/fmed3.ll | 4 +- test/CodeGen/AMDGPU/fmin_legacy.ll | 10 +- test/CodeGen/AMDGPU/fmul.f16.ll | 22 +- test/CodeGen/AMDGPU/fmul64.ll | 4 +- test/CodeGen/AMDGPU/fmuladd.f16.ll | 28 +- test/CodeGen/AMDGPU/fmuladd.f32.ll | 82 +- test/CodeGen/AMDGPU/fmuladd.f64.ll | 12 +- test/CodeGen/AMDGPU/fmuladd.v2f16.ll | 16 +- test/CodeGen/AMDGPU/fneg-combines.ll | 62 +- test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 4 +- test/CodeGen/AMDGPU/fneg-fabs.ll | 6 +- test/CodeGen/AMDGPU/fneg.f16.ll | 8 +- .../AMDGPU/fold-immediate-output-mods.mir | 53 +- test/CodeGen/AMDGPU/fold-operands-order.mir | 6 - test/CodeGen/AMDGPU/fp32_to_fp16.ll | 6 +- test/CodeGen/AMDGPU/fpext.f16.ll | 8 +- test/CodeGen/AMDGPU/fptosi.f16.ll | 6 +- test/CodeGen/AMDGPU/fptoui.f16.ll | 6 +- test/CodeGen/AMDGPU/fptrunc.f16.ll | 12 +- test/CodeGen/AMDGPU/fract.f64.ll | 10 +- test/CodeGen/AMDGPU/fract.ll | 12 +- test/CodeGen/AMDGPU/frem.ll | 8 +- test/CodeGen/AMDGPU/fsqrt.f64.ll | 4 +- test/CodeGen/AMDGPU/fsqrt.ll | 6 +- test/CodeGen/AMDGPU/fsub.f16.ll | 28 +- test/CodeGen/AMDGPU/fsub.ll | 24 +- test/CodeGen/AMDGPU/fsub64.ll | 4 +- test/CodeGen/AMDGPU/ftrunc.f64.ll | 6 +- test/CodeGen/AMDGPU/global-extload-i16.ll | 4 +- test/CodeGen/AMDGPU/global-smrd-unknown.ll | 20 + test/CodeGen/AMDGPU/half.ll | 10 +- test/CodeGen/AMDGPU/imm.ll | 4 +- test/CodeGen/AMDGPU/immv216.ll | 8 +- test/CodeGen/AMDGPU/indirect-addressing-si.ll | 8 +- test/CodeGen/AMDGPU/inline-asm.ll | 4 +- .../AMDGPU/invariant-load-no-alias-store.ll | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.class.ll | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll | 2 +- test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll | 4 +- test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll | 4 +- test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll | 4 +- test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll | 4 +- test/CodeGen/AMDGPU/llvm.ceil.f16.ll | 8 +- test/CodeGen/AMDGPU/llvm.cos.f16.ll | 12 +- test/CodeGen/AMDGPU/llvm.exp2.f16.ll | 8 +- test/CodeGen/AMDGPU/llvm.floor.f16.ll | 8 +- test/CodeGen/AMDGPU/llvm.fma.f16.ll | 12 +- test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll | 24 +- test/CodeGen/AMDGPU/llvm.log2.f16.ll | 8 +- test/CodeGen/AMDGPU/llvm.maxnum.f16.ll | 22 +- test/CodeGen/AMDGPU/llvm.minnum.f16.ll | 22 +- test/CodeGen/AMDGPU/llvm.rint.f16.ll | 10 +- test/CodeGen/AMDGPU/llvm.round.ll | 4 +- test/CodeGen/AMDGPU/llvm.sin.f16.ll | 12 +- test/CodeGen/AMDGPU/llvm.sqrt.f16.ll | 8 +- test/CodeGen/AMDGPU/llvm.trunc.f16.ll | 8 +- test/CodeGen/AMDGPU/load-global-f32.ll | 10 +- test/CodeGen/AMDGPU/load-global-f64.ll | 6 +- test/CodeGen/AMDGPU/load-global-i16.ll | 10 +- test/CodeGen/AMDGPU/load-global-i32.ll | 8 +- test/CodeGen/AMDGPU/load-global-i64.ll | 10 +- test/CodeGen/AMDGPU/load-global-i8.ll | 10 +- test/CodeGen/AMDGPU/load-weird-sizes.ll | 10 +- test/CodeGen/AMDGPU/lower-mem-intrinsics.ll | 12 + .../AMDGPU/macro-fusion-cluster-vcc-uses.mir | 227 + test/CodeGen/AMDGPU/mad-combine.ll | 106 +- test/CodeGen/AMDGPU/madak.ll | 6 +- test/CodeGen/AMDGPU/madmk.ll | 4 +- test/CodeGen/AMDGPU/max.ll | 4 +- test/CodeGen/AMDGPU/merge-stores.ll | 4 +- test/CodeGen/AMDGPU/mubuf.ll | 2 +- test/CodeGen/AMDGPU/mul.ll | 6 +- .../AMDGPU/multi-divergent-exit-region.ll | 4 +- test/CodeGen/AMDGPU/no-shrink-extloads.ll | 2 +- test/CodeGen/AMDGPU/or.ll | 6 +- .../promote-alloca-invariant-markers.ll | 2 +- .../AMDGPU/reduce-load-width-alignment.ll | 6 +- test/CodeGen/AMDGPU/regcoal-subrange-join.mir | 162 + test/CodeGen/AMDGPU/reorder-stores.ll | 4 +- test/CodeGen/AMDGPU/rotl.i64.ll | 4 +- test/CodeGen/AMDGPU/rotr.i64.ll | 4 +- test/CodeGen/AMDGPU/rsq.ll | 8 +- test/CodeGen/AMDGPU/s_movk_i32.ll | 4 +- test/CodeGen/AMDGPU/sad.ll | 4 +- test/CodeGen/AMDGPU/saddo.ll | 6 +- test/CodeGen/AMDGPU/salu-to-valu.ll | 6 +- test/CodeGen/AMDGPU/scalar_to_vector.ll | 6 +- test/CodeGen/AMDGPU/schedule-global-loads.ll | 2 +- test/CodeGen/AMDGPU/scratch-buffer.ll | 4 +- test/CodeGen/AMDGPU/scratch-simple.ll | 6 +- test/CodeGen/AMDGPU/sdiv.ll | 6 +- test/CodeGen/AMDGPU/sdwa-peephole.ll | 24 +- .../AMDGPU/select-fabs-fneg-extract.ll | 54 +- test/CodeGen/AMDGPU/select-vectors.ll | 6 +- test/CodeGen/AMDGPU/select.f16.ll | 63 +- test/CodeGen/AMDGPU/setcc-fneg-constant.ll | 6 +- test/CodeGen/AMDGPU/setcc.ll | 10 +- test/CodeGen/AMDGPU/sext-in-reg.ll | 8 +- .../AMDGPU/sgpr-copy-duplicate-operand.ll | 4 +- test/CodeGen/AMDGPU/sgpr-copy.ll | 4 +- test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll | 4 +- test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll | 2 +- test/CodeGen/AMDGPU/shift-i64-opts.ll | 4 +- test/CodeGen/AMDGPU/shl.ll | 4 +- test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir | 161 +- .../AMDGPU/si-triv-disjoint-mem-access.ll | 2 +- test/CodeGen/AMDGPU/sign_extend.ll | 4 +- test/CodeGen/AMDGPU/sitofp.f16.ll | 4 +- test/CodeGen/AMDGPU/sminmax.ll | 26 +- test/CodeGen/AMDGPU/sminmax.v2i16.ll | 6 +- test/CodeGen/AMDGPU/spill-cfg-position.ll | 2 +- test/CodeGen/AMDGPU/sra.ll | 6 +- test/CodeGen/AMDGPU/srem.ll | 6 +- test/CodeGen/AMDGPU/srl.ll | 4 +- test/CodeGen/AMDGPU/ssubo.ll | 6 +- test/CodeGen/AMDGPU/sub.i16.ll | 10 +- test/CodeGen/AMDGPU/sub.ll | 4 +- test/CodeGen/AMDGPU/sub.v2i16.ll | 16 +- test/CodeGen/AMDGPU/syncscopes.ll | 19 + test/CodeGen/AMDGPU/trunc-bitcast-vector.ll | 4 +- test/CodeGen/AMDGPU/trunc.ll | 6 +- test/CodeGen/AMDGPU/uaddo.ll | 10 +- test/CodeGen/AMDGPU/udiv.ll | 8 +- test/CodeGen/AMDGPU/uitofp.f16.ll | 4 +- test/CodeGen/AMDGPU/urem.ll | 6 +- test/CodeGen/AMDGPU/usubo.ll | 12 +- test/CodeGen/AMDGPU/v_cndmask.ll | 12 +- test/CodeGen/AMDGPU/v_mac.ll | 10 +- test/CodeGen/AMDGPU/v_mac_f16.ll | 38 +- test/CodeGen/AMDGPU/vectorize-global-local.ll | 2 +- .../CodeGen/AMDGPU/vop-shrink-frame-index.mir | 161 + test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir | 40 + test/CodeGen/AMDGPU/vselect.ll | 25 +- test/CodeGen/AMDGPU/waitcnt-permute.mir | 12 - test/CodeGen/AMDGPU/xor.ll | 8 +- test/CodeGen/AMDGPU/zext-i64-bit-operand.ll | 4 +- .../CodeGen/ARM/2012-06-12-SchedMemLatency.ll | 24 +- .../GlobalISel/arm-instruction-select-cmp.mir | 1252 +- test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll | 30 + .../ARM/GlobalISel/arm-legalize-divmod.mir | 20 + .../ARM/GlobalISel/arm-legalize-fp.mir | 1612 + test/CodeGen/ARM/GlobalISel/arm-legalizer.mir | 33 + .../ARM/GlobalISel/arm-regbankselect.mir | 58 + test/CodeGen/ARM/arguments-nosplit-double.ll | 1 + test/CodeGen/ARM/arguments-nosplit-i64.ll | 1 + .../ARM/cortex-a57-misched-ldm-wrback.ll | 8 +- test/CodeGen/ARM/cortex-a57-misched-ldm.ll | 4 +- .../ARM/cortex-a57-misched-stm-wrback.ll | 2 +- test/CodeGen/ARM/cortex-a57-misched-vfma.ll | 28 +- .../ARM/cortex-a57-misched-vldm-wrback.ll | 10 +- test/CodeGen/ARM/cortex-a57-misched-vldm.ll | 6 +- .../ARM/cortex-a57-misched-vstm-wrback.ll | 2 +- test/CodeGen/ARM/fence-singlethread.ll | 2 +- test/CodeGen/ARM/ror.ll | 33 + test/CodeGen/ARM/scavenging.mir | 66 + test/CodeGen/AVR/branch-relaxation.ll | 96 + test/CodeGen/AVR/ctlz.ll | 5 +- test/CodeGen/AVR/cttz.ll | 4 +- test/CodeGen/AVR/frmidx-iterator-bug.ll | 33 + .../icall-func-pointer-correct-addr-space.ll | 15 + test/CodeGen/AVR/pseudo/ANDIWRdK.mir | 6 +- test/CodeGen/AVR/pseudo/COMWRd.mir | 2 +- test/CodeGen/AVR/pseudo/ORIWRdK.mir | 2 +- test/CodeGen/AVR/pseudo/SBCIWRdK.mir | 2 +- test/CodeGen/AVR/pseudo/SUBIWRdK.mir | 2 +- test/CodeGen/AVR/select-mbb-placement-bug.ll | 6 +- test/CodeGen/BPF/undef.ll | 58 +- test/CodeGen/Generic/pr33094.ll | 18 + test/CodeGen/Hexagon/convertdptoint.ll | 8 +- test/CodeGen/Hexagon/convertdptoll.ll | 4 +- test/CodeGen/Hexagon/convertsptoint.ll | 4 +- test/CodeGen/Hexagon/convertsptoll.ll | 4 +- test/CodeGen/Hexagon/dadd.ll | 8 +- test/CodeGen/Hexagon/dmul.ll | 8 +- .../Hexagon/doubleconvert-ieee-rnd-near.ll | 8 +- test/CodeGen/Hexagon/dsub.ll | 8 +- test/CodeGen/Hexagon/fadd.ll | 8 +- test/CodeGen/Hexagon/fmul.ll | 8 +- test/CodeGen/Hexagon/fsub.ll | 8 +- test/CodeGen/Hexagon/hasfp-crash1.ll | 82 + test/CodeGen/Hexagon/hasfp-crash2.ll | 83 + test/CodeGen/Hexagon/hvx-nontemporal.ll | 28 + test/CodeGen/Hexagon/target-flag-ext.mir | 24 + .../MIR/AArch64/atomic-memoperands.mir | 4 +- .../AArch64/invalid-target-memoperands.mir | 19 + .../MIR/AArch64/target-memoperands.mir | 22 + test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir | 20 +- test/CodeGen/MIR/AMDGPU/syncscopes.mir | 98 + test/CodeGen/MIR/AMDGPU/target-flags.mir | 29 + test/CodeGen/MIR/Generic/runPass.mir | 2 + test/CodeGen/MIR/Hexagon/target-flags.mir | 36 + .../MIR/X86/tied-physical-regs-match.mir | 22 + test/CodeGen/MSP430/Inst16mm.ll | 4 +- test/CodeGen/NVPTX/lower-aggr-copies.ll | 61 + test/CodeGen/PowerPC/PR33636.ll | 702 + test/CodeGen/PowerPC/atomics-regression.ll | 528 +- test/CodeGen/PowerPC/bitreverse.ll | 23 - test/CodeGen/PowerPC/build-vector-tests.ll | 4 +- test/CodeGen/PowerPC/ppc-ctr-dead-code.ll | 38 + .../PowerPC/ppc-redzone-alignment-bug.ll | 32 + test/CodeGen/PowerPC/ppc64le-smallarg.ll | 4 +- test/CodeGen/PowerPC/pr33093.ll | 165 + test/CodeGen/PowerPC/select-addrRegRegOnly.ll | 37 + test/CodeGen/PowerPC/svr4-redzone.ll | 6 +- test/CodeGen/PowerPC/tailcall1-64.ll | 7 +- test/CodeGen/PowerPC/testBitReverse.ll | 105 + test/CodeGen/PowerPC/vec_extract_p9.ll | 167 + test/CodeGen/PowerPC/vec_int_ext.ll | 251 +- .../vsx-partword-int-loads-and-stores.ll | 16 +- .../regalloc-fast-invalid-kill-flag.mir | 34 + .../Thumb2/ifcvt-no-branch-predictor.ll | 22 +- test/CodeGen/WebAssembly/umulo-i64.ll | 21 + test/CodeGen/X86/2012-08-16-setcc.ll | 42 +- test/CodeGen/X86/GC/badreadproto.ll | 2 +- test/CodeGen/X86/GC/badrootproto.ll | 2 +- test/CodeGen/X86/GC/badwriteproto.ll | 2 +- test/CodeGen/X86/GC/fat.ll | 2 +- test/CodeGen/X86/GC/outside.ll | 2 +- test/CodeGen/X86/GlobalISel/GV.ll | 63 + test/CodeGen/X86/GlobalISel/add-vec.ll | 173 +- test/CodeGen/X86/GlobalISel/constant.ll | 9 + test/CodeGen/X86/GlobalISel/ext-x86-64.ll | 2 +- test/CodeGen/X86/GlobalISel/ext.ll | 36 + test/CodeGen/X86/GlobalISel/legalize-GV.mir | 31 + test/CodeGen/X86/GlobalISel/legalize-ext.mir | 171 +- .../X86/GlobalISel/legalize-memop-scalar.mir | 110 + .../X86/GlobalISel/memop-scalar-x32.ll | 22 + test/CodeGen/X86/GlobalISel/memop-scalar.ll | 20 + .../X86/GlobalISel/regbankselect-X86_64.mir | 27 + test/CodeGen/X86/GlobalISel/select-GV.mir | 99 + .../X86/GlobalISel/select-constant.mir | 31 + test/CodeGen/X86/GlobalISel/select-ext.mir | 64 + .../X86/GlobalISel/select-unmerge-vec256.mir | 53 + .../X86/GlobalISel/select-unmerge-vec512.mir | 74 + .../CodeGen/X86/GlobalISel/x86_64-fallback.ll | 18 + test/CodeGen/X86/avg.ll | 6 +- test/CodeGen/X86/avx-cmp.ll | 193 +- test/CodeGen/X86/avx-load-store.ll | 275 +- test/CodeGen/X86/avx-schedule.ll | 648 +- test/CodeGen/X86/avx-unpack.ll | 166 +- test/CodeGen/X86/avx-vinsertf128.ll | 118 +- test/CodeGen/X86/avx2-vbroadcast.ll | 12 +- test/CodeGen/X86/avx512-cmp.ll | 2 + test/CodeGen/X86/avx512-insert-extract.ll | 26 +- test/CodeGen/X86/avx512-vec-cmp.ll | 53 + test/CodeGen/X86/avx512vl-vec-cmp.ll | 925 +- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 50906 ++++++++++++++-- test/CodeGen/X86/bitcast-and-setcc-128.ll | 156 +- test/CodeGen/X86/bitcast-and-setcc-256.ll | 104 +- test/CodeGen/X86/bitcast-and-setcc-512.ll | 1868 + .../X86/bitcast-int-to-vector-bool-sext.ll | 3483 ++ .../X86/bitcast-int-to-vector-bool-zext.ll | 3279 + .../CodeGen/X86/bitcast-int-to-vector-bool.ll | 685 + test/CodeGen/X86/bitcast-setcc-128.ll | 156 +- test/CodeGen/X86/bitcast-setcc-256.ll | 419 +- test/CodeGen/X86/bitcast-setcc-512.ll | 1377 + test/CodeGen/X86/block-placement.ll | 101 +- test/CodeGen/X86/bool-simplify.ll | 129 +- .../X86/broadcast-elm-cross-splat-vec.ll | 2065 +- test/CodeGen/X86/bswap-wide-int.ll | 4 +- test/CodeGen/X86/build-vector-128.ll | 23 +- test/CodeGen/X86/build-vector-256.ll | 29 +- test/CodeGen/X86/build-vector-512.ll | 20 +- test/CodeGen/X86/cast-vsel.ll | 2 +- .../X86/clear_upper_vector_element_bits.ll | 240 +- test/CodeGen/X86/cmov.ll | 207 +- .../X86/code_placement_cold_loop_blocks.ll | 5 +- test/CodeGen/X86/combine-avx-intrinsics.ll | 47 +- test/CodeGen/X86/combine-avx2-intrinsics.ll | 69 +- test/CodeGen/X86/combine-rotates.ll | 80 + test/CodeGen/X86/combine-sse41-intrinsics.ll | 72 +- test/CodeGen/X86/constant-hoisting-bfi.ll | 52 +- .../element-wise-atomic-memory-intrinsics.ll | 124 + test/CodeGen/X86/extract-store.ll | 2 +- ...ractelement-legalization-store-ordering.ll | 51 +- test/CodeGen/X86/fast-isel-abort-warm.ll | 19 +- test/CodeGen/X86/fast-isel-gc-intrinsics.ll | 57 + test/CodeGen/X86/fastisel-softfloat.ll | 15 + test/CodeGen/X86/fp128-i128.ll | 2 +- test/CodeGen/X86/gather-addresses.ll | 16 +- test/CodeGen/X86/half.ll | 1043 +- .../CodeGen/X86/illegal-bitfield-loadstore.ll | 251 +- test/CodeGen/X86/optimize-max-1.ll | 51 +- test/CodeGen/X86/optimize-max-2.ll | 26 +- test/CodeGen/X86/pr15309.ll | 50 +- test/CodeGen/X86/pr23603.ll | 27 +- test/CodeGen/X86/pr33715.ll | 16 + test/CodeGen/X86/rdrand-x86_64.ll | 19 + test/CodeGen/X86/rdrand.ll | 119 +- test/CodeGen/X86/rdseed-x86_64.ll | 19 + test/CodeGen/X86/rdseed.ll | 66 +- test/CodeGen/X86/recip-fastmath.ll | 116 +- test/CodeGen/X86/recip-fastmath2.ll | 162 +- .../X86/regalloc-reconcile-broken-hints.ll | 2 +- test/CodeGen/X86/rotate4.ll | 104 +- test/CodeGen/X86/sbb.ll | 46 +- test/CodeGen/X86/select_const.ll | 113 +- test/CodeGen/X86/shift-codegen.ll | 42 +- test/CodeGen/X86/shift-folding.ll | 57 +- test/CodeGen/X86/shuffle-vs-trunc-256.ll | 313 +- test/CodeGen/X86/shuffle-vs-trunc-512.ll | 422 +- test/CodeGen/X86/sink-blockfreq.ll | 2 +- test/CodeGen/X86/sink-gep-before-mem-inst.ll | 25 + test/CodeGen/X86/soft-fp-legal-in-HW-reg.ll | 55 + test/CodeGen/X86/sse-schedule.ll | 248 +- test/CodeGen/X86/sse2-schedule.ll | 598 +- test/CodeGen/X86/sse3-schedule.ll | 48 +- test/CodeGen/X86/sse41-schedule.ll | 222 +- test/CodeGen/X86/sse42-schedule.ll | 38 +- test/CodeGen/X86/sse4a-schedule.ll | 95 + test/CodeGen/X86/ssse3-schedule.ll | 74 +- test/CodeGen/X86/swizzle-avx2.ll | 73 +- test/CodeGen/X86/tbm_patterns.ll | 410 +- test/CodeGen/X86/vec-copysign.ll | 2 +- test/CodeGen/X86/vec_return.ll | 17 +- test/CodeGen/X86/vec_shift6.ll | 9 +- test/CodeGen/X86/vec_unsafe-fp-math.ll | 15 +- test/CodeGen/X86/vector-popcnt-128.ll | 93 +- test/CodeGen/X86/vector-popcnt-256.ll | 14 +- test/CodeGen/X86/vector-popcnt-512.ll | 120 +- .../X86/vector-shuffle-combining-sse4a.ll | 86 + .../X86/vector-shuffle-combining-ssse3.ll | 15 + test/CodeGen/X86/vector-shuffle-sse4a.ll | 129 +- test/CodeGen/X86/vector-truncate-combine.ll | 10 +- test/CodeGen/X86/vector-tzcnt-128.ll | 54 +- test/CodeGen/X86/vector-tzcnt-256.ll | 28 +- test/CodeGen/X86/vector-tzcnt-512.ll | 124 +- test/CodeGen/X86/wide-integer-cmp.ll | 2 +- test/CodeGen/X86/x32-lea-1.ll | 10 +- test/CodeGen/X86/x86-interleaved-access.ll | 229 +- test/CodeGen/X86/zext-shl.ll | 39 +- test/CodeGen/X86/zext-trunc.ll | 9 +- test/DebugInfo/COFF/asm.ll | 6 +- test/DebugInfo/COFF/cpp-mangling.ll | 4 +- test/DebugInfo/COFF/fp-stack.ll | 2 +- test/DebugInfo/COFF/globals.ll | 6 +- test/DebugInfo/COFF/inlining-files.ll | 4 +- test/DebugInfo/COFF/inlining-header.ll | 8 +- test/DebugInfo/COFF/inlining-levels.ll | 8 +- test/DebugInfo/COFF/inlining-same-name.ll | 6 +- test/DebugInfo/COFF/inlining.ll | 6 +- test/DebugInfo/COFF/int8-char-type.ll | 4 +- test/DebugInfo/COFF/local-constant.ll | 5 +- test/DebugInfo/COFF/local-variable-gap.ll | 7 +- test/DebugInfo/COFF/local-variables.ll | 26 +- test/DebugInfo/COFF/long-name.ll | 2 +- test/DebugInfo/COFF/multifile.ll | 8 +- test/DebugInfo/COFF/multifunction.ll | 12 +- test/DebugInfo/COFF/pieces.ll | 34 +- test/DebugInfo/COFF/register-variables.ll | 30 +- test/DebugInfo/COFF/simple.ll | 8 +- test/DebugInfo/COFF/typedef.ll | 4 +- test/DebugInfo/COFF/types-array.ll | 6 +- test/DebugInfo/COFF/types-basic.ll | 46 +- test/DebugInfo/COFF/udts.ll | 22 +- .../Inputs/dwarfdump-str-offsets-macho.o | Bin 0 -> 1584 bytes .../Inputs/dwarfdump-str-offsets-macho.s | 201 + ...space => dwarfdump-test3.elf-x86-64-space} | Bin test/DebugInfo/PDB/Inputs/every-type.cpp | 63 + test/DebugInfo/PDB/Inputs/every-type.pdb | Bin 0 -> 102400 bytes test/DebugInfo/PDB/Inputs/every-type.yaml | 272 + test/DebugInfo/PDB/every-type.test | 261 + test/DebugInfo/PDB/pdbdump-headers.test | 116 +- .../PDB/pdbdump-merge-ids-and-types.test | 3 +- test/DebugInfo/PDB/pdbdump-mergetypes.test | 6 +- test/DebugInfo/X86/dbg-declare-inalloca.ll | 14 +- test/DebugInfo/dwarfdump-str-offsets.test | 148 +- test/DebugInfo/invalid-relocations.test | 35 + test/DebugInfo/llvm-symbolizer.test | 7 +- .../MemorySanitizer/unsized_type.ll | 22 + .../Instrumentation/ThreadSanitizer/atomic.ll | 8 +- test/LTO/Resolution/X86/linker-redef-thin.ll | 16 + test/Linker/Inputs/syncscope-1.ll | 6 + test/Linker/Inputs/syncscope-2.ll | 6 + test/Linker/Inputs/thumb-module-inline-asm.ll | 3 + .../link-arm-and-thumb-module-inline-asm.ll | 20 + test/Linker/syncscopes.ll | 11 + test/MC/AArch64/label-arithmetic-diags-elf.s | 51 +- test/MC/AMDGPU/gfx9_asm_all.s | 459 + test/MC/AMDGPU/vop3p-err.s | 41 - test/MC/AMDGPU/vop3p.s | 63 +- test/MC/ARM/elf-movt.s | 24 + test/MC/ARM/invalid-instructions-spellcheck.s | 68 + test/MC/ARM/ldr-pseudo-unpredictable.s | 16 +- test/MC/COFF/bad-expr.s | 3 +- test/MC/COFF/cv-def-range-gap.s | 16 +- test/MC/COFF/cv-def-range.s | 10 +- test/MC/COFF/cv-inline-linetable-infloop.s | 2 +- test/MC/COFF/cv-inline-linetable-unlikely.s | 4 +- .../MC/COFF/cv-inline-linetable-unreachable.s | 2 +- test/MC/COFF/cv-inline-linetable.s | 4 +- test/MC/Disassembler/Mips/mt/valid-r2-el.txt | 32 + test/MC/Disassembler/Mips/mt/valid-r2.txt | 32 + test/MC/ELF/bad-expr3.s | 3 +- test/MC/Mips/addend.s | 21 + test/MC/Mips/mt/abiflag.s | 10 + test/MC/Mips/mt/invalid-wrong-error.s | 3 + test/MC/Mips/mt/invalid.s | 27 + .../mftr-mttr-aliases-invalid-wrong-error.s | 18 + test/MC/Mips/mt/mftr-mttr-aliases-invalid.s | 23 + test/MC/Mips/mt/mftr-mttr-aliases.s | 47 + test/MC/Mips/mt/mftr-mttr-reserved-valid.s | 8 + test/MC/Mips/mt/module-directive-invalid.s | 6 + test/MC/Mips/mt/module-directive.s | 16 + test/MC/Mips/mt/set-directive.s | 14 + test/MC/Mips/mt/valid.s | 33 + test/MC/WebAssembly/array-fill.ll | 14 + test/MC/WebAssembly/external-data.ll | 3 +- test/MC/WebAssembly/external-func-address.ll | 49 +- test/MC/WebAssembly/unnamed-data.ll | 3 +- test/MC/WebAssembly/weak-alias.ll | 37 +- test/Object/Inputs/trivial-object-test.wasm | Bin 0 -> 303 bytes test/Object/Inputs/trivial.ll | 3 + test/Object/nm-trivial-object.test | 7 + test/Object/obj2yaml.test | 25 +- test/Object/objdump-relocations.test | 7 + test/ObjectYAML/wasm/data_section.yaml | 5 +- test/Other/2002-01-31-CallGraph.ll | 2 +- test/Other/new-pm-defaults.ll | 40 + test/Other/new-pm-lto-defaults.ll | 11 +- test/Other/pass-pipelines.ll | 2 +- .../basic-use-after-reloc.ll | 23 + test/SafepointIRVerifier/compares.ll | 85 + test/SafepointIRVerifier/constant-bases.ll | 70 + .../unrecorded-live-at-sp.ll | 71 + test/SafepointIRVerifier/uses-in-phi-nodes.ll | 78 + test/TableGen/AsmVariant.td | 1 + test/TableGen/GlobalISelEmitter.td | 931 +- test/TableGen/UnterminatedComment.td | 2 +- .../pr33641_remove_arg_dbgvalue.ll | 38 + test/Transforms/CodeGenPrepare/X86/memcmp.ll | 77 +- .../CodeGenPrepare/X86/sink-addrmode.ll | 24 + .../CodeGenPrepare/crash-on-large-allocas.ll | 16 + .../ConstantHoisting/ARM/bad-cases.ll | 31 + .../ConstantHoisting/ARM/insertvalue.ll | 31 + test/Transforms/ConstantHoisting/X86/ehpad.ll | 5 +- test/Transforms/GVN/PRE/atomic.ll | 6 +- test/Transforms/GVN/PRE/phi-translate-2.ll | 131 - test/Transforms/GVN/PRE/pre-gep-load.ll | 2 +- test/Transforms/GVN/PRE/pre-load.ll | 6 +- .../IndVarSimplify/canonicalize-cmp.ll | 98 + .../IndVarSimplify/eliminate-comparison.ll | 4 +- .../IndVarSimplify/strengthen-overflow.ll | 84 + .../IndVarSimplify/widen-loop-comp.ll | 2 +- .../InferAddressSpaces/AMDGPU/basic.ll | 12 + .../Inline/ARM/inline-target-attr.ll | 60 + test/Transforms/Inline/ARM/lit.local.cfg | 2 + .../Inline/cgscc-incremental-invalidate.ll | 105 +- .../InstCombine/2017-07-07-UMul-ZExt.ll | 29 + test/Transforms/InstCombine/and-or-not.ll | 24 +- test/Transforms/InstCombine/bswap-fold.ll | 161 +- test/Transforms/InstCombine/cmp-intrinsic.ll | 123 + .../InstCombine/consecutive-fences.ll | 12 +- test/Transforms/InstCombine/icmp.ll | 16 +- test/Transforms/InstCombine/intrinsics.ll | 60 - test/Transforms/InstCombine/or-xor.ll | 24 +- .../InstCombine/pr33689_same_bitwidth.ll | 53 + test/Transforms/InstCombine/select-implied.ll | 77 + test/Transforms/InstCombine/select.ll | 7 + .../ARM/interleaved-accesses.ll | 29 + test/Transforms/LoopRotate/pr33701.ll | 27 + .../2013-01-14-ReuseCast.ll | 4 +- .../X86/lsr-filtering-scaledreg.ll | 60 + .../LoopUnroll/runtime-loop-multiple-exits.ll | 353 +- test/Transforms/LoopUnroll/runtime-loop.ll | 68 + .../LoopVectorize/X86/slm-no-vectorize.ll | 49 + .../LoopVectorize/if-conversion-nest.ll | 95 +- test/Transforms/LoopVectorize/pr33706.ll | 61 + .../LowerTypeTests/Inputs/import-icall.yaml | 1 + .../Transforms/LowerTypeTests/import-icall.ll | 7 + test/Transforms/NewGVN/pr33720.ll | 91 + .../PGOProfile/counter_promo_exit_merge.ll | 4 +- .../PGOProfile/counter_promo_mexits.ll | 4 +- .../PGOProfile/counter_promo_nest.ll | 165 + test/Transforms/SimplifyCFG/implied-and-or.ll | 183 + .../SimplifyCFG/sink-common-code.ll | 24 + test/Transforms/Sink/fence.ll | 8 +- .../ThinLTOBitcodeWriter/pr33536.ll | 37 + test/Unit/lit.cfg | 5 +- .../2004-05-21-SwitchConstantMismatch.ll | 2 +- test/Verifier/2007-12-21-InvokeParamAttrs.ll | 2 +- test/Verifier/2008-01-11-VarargAttrs.ll | 2 +- test/Verifier/2009-05-29-InvokeResult1.ll | 2 +- test/Verifier/2009-05-29-InvokeResult2.ll | 2 +- test/Verifier/2009-05-29-InvokeResult3.ll | 2 +- test/Verifier/byval-1.ll | 2 +- .../element-wise-atomic-memory-intrinsics.ll | 42 + test/Verifier/gcread-ptrptr.ll | 2 +- test/Verifier/gcroot-alloca.ll | 2 +- test/Verifier/gcroot-meta.ll | 2 +- test/Verifier/gcroot-ptrptr.ll | 2 +- test/Verifier/gcwrite-ptrptr.ll | 2 +- test/lit.cfg | 5 +- test/tools/llvm-cov/threads.c | 11 + test/tools/llvm-cov/zeroFunctionFile.c | 2 +- .../ARM/Inputs/reloc-half.obj.macho-arm | Bin 0 -> 360 bytes .../llvm-objdump/ARM/macho-reloc-half.test | 4 + test/tools/llvm-objdump/Inputs/test.wasm | Bin 181 -> 0 bytes test/tools/llvm-objdump/Inputs/trivial.ll | 19 + .../llvm-objdump/Inputs/trivial.obj.wasm | Bin 0 -> 303 bytes .../WebAssembly/symbol-table.test | 17 +- test/tools/llvm-objdump/wasm.txt | 35 +- .../llvm-pdbdump/partial-type-stream.test | 3 +- test/tools/llvm-profdata/c-general.test | 4 + test/tools/llvm-readobj/Inputs/trivial.ll | 14 +- .../llvm-readobj/Inputs/trivial.obj.wasm | Bin 221 -> 285 bytes .../llvm-readobj/codeview-linetables.test | 20 +- test/tools/llvm-readobj/file-headers.test | 3 - test/tools/llvm-readobj/relocations.test | 19 +- test/tools/llvm-readobj/sections.test | 131 +- test/tools/llvm-readobj/symbols.test | 22 +- tools/gold/gold-plugin.cpp | 2 +- tools/lli/OrcLazyJIT.cpp | 23 +- tools/lli/OrcLazyJIT.h | 29 +- tools/lli/RemoteJITUtils.h | 4 +- tools/lli/lli.cpp | 2 +- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 8 +- tools/llvm-c-test/echo.cpp | 8 +- tools/llvm-cov/CodeCoverage.cpp | 19 +- tools/llvm-lto/llvm-lto.cpp | 6 +- tools/llvm-objdump/llvm-objdump.cpp | 2 +- tools/llvm-pdbutil/CMakeLists.txt | 1 + tools/llvm-pdbutil/Diff.cpp | 688 +- tools/llvm-pdbutil/DiffPrinter.cpp | 147 + tools/llvm-pdbutil/DiffPrinter.h | 172 + tools/llvm-pdbutil/DumpOutputStyle.cpp | 7 + tools/llvm-pdbutil/FormatUtil.cpp | 52 + tools/llvm-pdbutil/FormatUtil.h | 10 +- tools/llvm-pdbutil/MinimalTypeDumper.cpp | 6 +- tools/llvm-pdbutil/StreamUtil.cpp | 85 +- tools/llvm-pdbutil/StreamUtil.h | 5 + tools/llvm-pdbutil/llvm-pdbutil.cpp | 44 +- tools/llvm-pdbutil/llvm-pdbutil.h | 7 + tools/llvm-profdata/llvm-profdata.cpp | 74 +- tools/llvm-readobj/COFFDumper.cpp | 6 +- tools/llvm-readobj/WasmDumper.cpp | 6 + tools/llvm-shlib/CMakeLists.txt | 2 +- tools/llvm-stress/llvm-stress.cpp | 16 +- tools/obj2yaml/wasm2yaml.cpp | 7 +- tools/opt-viewer/CMakeLists.txt | 13 + {utils => tools}/opt-viewer/opt-diff.py | 0 {utils => tools}/opt-viewer/opt-stats.py | 0 {utils => tools}/opt-viewer/opt-viewer.py | 5 +- {utils => tools}/opt-viewer/optpmap.py | 0 {utils => tools}/opt-viewer/optrecord.py | 0 {utils => tools}/opt-viewer/style.css | 0 tools/opt/NewPMDriver.cpp | 101 +- tools/sanstats/sanstats.cpp | 5 +- tools/yaml2obj/yaml2wasm.cpp | 2 +- unittests/ADT/APFloatTest.cpp | 16 +- unittests/ADT/FunctionRefTest.cpp | 14 + unittests/Analysis/AliasAnalysisTest.cpp | 5 +- unittests/Analysis/CGSCCPassManagerTest.cpp | 198 +- unittests/Analysis/LazyCallGraphTest.cpp | 33 +- .../Orc/CompileOnDemandLayerTest.cpp | 5 +- .../Orc/GlobalMappingLayerTest.cpp | 8 +- .../Orc/LazyEmittingLayerTest.cpp | 2 +- .../Orc/ObjectTransformLayerTest.cpp | 95 +- unittests/ExecutionEngine/Orc/OrcCAPITest.cpp | 32 +- unittests/ExecutionEngine/Orc/OrcTestCommon.h | 29 +- .../Orc/RTDyldObjectLinkingLayerTest.cpp | 60 +- unittests/IR/CMakeLists.txt | 2 + unittests/IR/ModuleTest.cpp | 2 +- unittests/IR/PassBuilderCallbacksTest.cpp | 520 + unittests/IR/PassManagerTest.cpp | 7 + unittests/ProfileData/CMakeLists.txt | 2 + unittests/ProfileData/CoverageMappingTest.cpp | 97 +- unittests/ProfileData/InstrProfTest.cpp | 268 +- .../DynamicLibrary/DynamicLibraryTest.cpp | 10 + unittests/Support/ErrorTest.cpp | 2 +- unittests/Support/Host.cpp | 61 + unittests/Support/MathExtrasTest.cpp | 2 + unittests/Transforms/Utils/Cloning.cpp | 13 +- utils/TableGen/AsmMatcherEmitter.cpp | 47 +- utils/TableGen/AsmWriterEmitter.cpp | 4 +- utils/TableGen/CodeEmitterGen.cpp | 14 +- utils/TableGen/CodeGenInstruction.h | 2 +- utils/TableGen/CodeGenMapTable.cpp | 4 +- utils/TableGen/CodeGenTarget.cpp | 2 +- utils/TableGen/CodeGenTarget.h | 2 +- utils/TableGen/DAGISelMatcherGen.cpp | 2 +- utils/TableGen/FastISelEmitter.cpp | 18 +- utils/TableGen/FixedLenDecoderEmitter.cpp | 4 +- utils/TableGen/GlobalISelEmitter.cpp | 775 +- utils/TableGen/InstrInfoEmitter.cpp | 12 +- utils/TableGen/RegisterBankEmitter.cpp | 2 +- utils/TableGen/SearchableTableEmitter.cpp | 10 +- utils/TableGen/SubtargetEmitter.cpp | 9 +- utils/TableGen/X86DisassemblerTables.cpp | 2 +- utils/TableGen/X86DisassemblerTables.h | 2 +- utils/TableGen/X86ModRMFilters.h | 2 +- utils/TableGen/X86RecognizableInstr.cpp | 4 +- utils/TableGen/X86RecognizableInstr.h | 2 +- utils/docker/build_docker_image.sh | 52 +- utils/docker/scripts/build_install_llvm.sh | 12 +- utils/lit/lit/TestRunner.py | 241 +- utils/lit/lit/formats/googletest.py | 5 +- utils/lit/lit/main.py | 3 +- utils/lit/tests/selecting.py | 5 + utils/vim/syntax/llvm.vim | 6 +- 1199 files changed, 98409 insertions(+), 18934 deletions(-) delete mode 100644 docs/Dummy.html create mode 100644 include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h create mode 100644 include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h create mode 100644 include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h create mode 100644 include/llvm/IR/SafepointIRVerifier.h create mode 100644 include/llvm/Support/ReverseIteration.h create mode 100644 lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp create mode 100644 lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp create mode 100644 lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp create mode 100644 lib/Fuzzer/test/reduce_inputs.test create mode 100644 lib/IR/SafepointIRVerifier.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUMacroFusion.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUMacroFusion.h create mode 100644 lib/Target/Mips/MipsMTInstrFormats.td create mode 100644 lib/Target/Mips/MipsMTInstrInfo.td create mode 100644 test/Analysis/DependenceAnalysis/BasePtrBug.ll create mode 100644 test/Bitcode/Inputs/module-hash-strtab1.ll create mode 100644 test/Bitcode/Inputs/module-hash-strtab2.ll create mode 100644 test/Bitcode/atomic-no-syncscope.ll create mode 100644 test/Bitcode/atomic-no-syncscope.ll.bc create mode 100644 test/Bitcode/module-hash-strtab.ll create mode 100644 test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir create mode 100644 test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir create mode 100644 test/CodeGen/AArch64/preferred-function-alignment.ll create mode 100644 test/CodeGen/AMDGPU/fcanonicalize-elimination.ll create mode 100644 test/CodeGen/AMDGPU/global-smrd-unknown.ll create mode 100644 test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir create mode 100644 test/CodeGen/AMDGPU/regcoal-subrange-join.mir create mode 100644 test/CodeGen/AMDGPU/syncscopes.ll create mode 100644 test/CodeGen/AMDGPU/vop-shrink-frame-index.mir create mode 100644 test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir create mode 100644 test/CodeGen/ARM/ror.ll create mode 100644 test/CodeGen/ARM/scavenging.mir create mode 100644 test/CodeGen/AVR/branch-relaxation.ll create mode 100644 test/CodeGen/AVR/frmidx-iterator-bug.ll create mode 100644 test/CodeGen/AVR/icall-func-pointer-correct-addr-space.ll create mode 100644 test/CodeGen/Generic/pr33094.ll create mode 100644 test/CodeGen/Hexagon/hasfp-crash1.ll create mode 100644 test/CodeGen/Hexagon/hasfp-crash2.ll create mode 100644 test/CodeGen/Hexagon/hvx-nontemporal.ll create mode 100644 test/CodeGen/Hexagon/target-flag-ext.mir create mode 100644 test/CodeGen/MIR/AArch64/invalid-target-memoperands.mir create mode 100644 test/CodeGen/MIR/AArch64/target-memoperands.mir create mode 100644 test/CodeGen/MIR/AMDGPU/syncscopes.mir create mode 100644 test/CodeGen/MIR/AMDGPU/target-flags.mir create mode 100644 test/CodeGen/MIR/Hexagon/target-flags.mir create mode 100644 test/CodeGen/MIR/X86/tied-physical-regs-match.mir create mode 100644 test/CodeGen/PowerPC/PR33636.ll delete mode 100644 test/CodeGen/PowerPC/bitreverse.ll create mode 100644 test/CodeGen/PowerPC/ppc-ctr-dead-code.ll create mode 100644 test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll create mode 100644 test/CodeGen/PowerPC/pr33093.ll create mode 100644 test/CodeGen/PowerPC/select-addrRegRegOnly.ll create mode 100644 test/CodeGen/PowerPC/testBitReverse.ll create mode 100644 test/CodeGen/PowerPC/vec_extract_p9.ll create mode 100644 test/CodeGen/SystemZ/regalloc-fast-invalid-kill-flag.mir create mode 100644 test/CodeGen/WebAssembly/umulo-i64.ll create mode 100644 test/CodeGen/X86/GlobalISel/GV.ll create mode 100644 test/CodeGen/X86/GlobalISel/legalize-GV.mir create mode 100644 test/CodeGen/X86/GlobalISel/legalize-memop-scalar.mir create mode 100644 test/CodeGen/X86/GlobalISel/select-GV.mir create mode 100644 test/CodeGen/X86/GlobalISel/select-unmerge-vec256.mir create mode 100644 test/CodeGen/X86/GlobalISel/select-unmerge-vec512.mir create mode 100644 test/CodeGen/X86/GlobalISel/x86_64-fallback.ll create mode 100644 test/CodeGen/X86/bitcast-and-setcc-512.ll create mode 100644 test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll create mode 100644 test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll create mode 100644 test/CodeGen/X86/bitcast-int-to-vector-bool.ll create mode 100644 test/CodeGen/X86/bitcast-setcc-512.ll create mode 100644 test/CodeGen/X86/combine-rotates.ll create mode 100644 test/CodeGen/X86/fast-isel-gc-intrinsics.ll create mode 100644 test/CodeGen/X86/fastisel-softfloat.ll create mode 100644 test/CodeGen/X86/pr33715.ll create mode 100644 test/CodeGen/X86/rdrand-x86_64.ll create mode 100644 test/CodeGen/X86/rdseed-x86_64.ll create mode 100644 test/CodeGen/X86/sink-gep-before-mem-inst.ll create mode 100644 test/CodeGen/X86/soft-fp-legal-in-HW-reg.ll create mode 100644 test/CodeGen/X86/sse4a-schedule.ll create mode 100644 test/CodeGen/X86/vector-shuffle-combining-sse4a.ll create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-macho.o create mode 100644 test/DebugInfo/Inputs/dwarfdump-str-offsets-macho.s rename test/DebugInfo/Inputs/{dwarfdump-test3.elf-x86-64 space => dwarfdump-test3.elf-x86-64-space} (100%) create mode 100644 test/DebugInfo/PDB/Inputs/every-type.cpp create mode 100644 test/DebugInfo/PDB/Inputs/every-type.pdb create mode 100644 test/DebugInfo/PDB/Inputs/every-type.yaml create mode 100644 test/DebugInfo/PDB/every-type.test create mode 100644 test/DebugInfo/invalid-relocations.test create mode 100644 test/Instrumentation/MemorySanitizer/unsized_type.ll create mode 100644 test/LTO/Resolution/X86/linker-redef-thin.ll create mode 100644 test/Linker/Inputs/syncscope-1.ll create mode 100644 test/Linker/Inputs/syncscope-2.ll create mode 100644 test/Linker/Inputs/thumb-module-inline-asm.ll create mode 100644 test/Linker/link-arm-and-thumb-module-inline-asm.ll create mode 100644 test/Linker/syncscopes.ll create mode 100644 test/MC/ARM/invalid-instructions-spellcheck.s create mode 100644 test/MC/Disassembler/Mips/mt/valid-r2-el.txt create mode 100644 test/MC/Disassembler/Mips/mt/valid-r2.txt create mode 100644 test/MC/Mips/addend.s create mode 100644 test/MC/Mips/mt/abiflag.s create mode 100644 test/MC/Mips/mt/invalid-wrong-error.s create mode 100644 test/MC/Mips/mt/invalid.s create mode 100644 test/MC/Mips/mt/mftr-mttr-aliases-invalid-wrong-error.s create mode 100644 test/MC/Mips/mt/mftr-mttr-aliases-invalid.s create mode 100644 test/MC/Mips/mt/mftr-mttr-aliases.s create mode 100644 test/MC/Mips/mt/mftr-mttr-reserved-valid.s create mode 100644 test/MC/Mips/mt/module-directive-invalid.s create mode 100644 test/MC/Mips/mt/module-directive.s create mode 100644 test/MC/Mips/mt/set-directive.s create mode 100644 test/MC/Mips/mt/valid.s create mode 100644 test/MC/WebAssembly/array-fill.ll create mode 100644 test/Object/Inputs/trivial-object-test.wasm create mode 100644 test/SafepointIRVerifier/basic-use-after-reloc.ll create mode 100644 test/SafepointIRVerifier/compares.ll create mode 100644 test/SafepointIRVerifier/constant-bases.ll create mode 100644 test/SafepointIRVerifier/unrecorded-live-at-sp.ll create mode 100644 test/SafepointIRVerifier/uses-in-phi-nodes.ll create mode 100644 test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll create mode 100644 test/Transforms/CodeGenPrepare/crash-on-large-allocas.ll create mode 100644 test/Transforms/ConstantHoisting/ARM/insertvalue.ll delete mode 100644 test/Transforms/GVN/PRE/phi-translate-2.ll create mode 100644 test/Transforms/IndVarSimplify/canonicalize-cmp.ll create mode 100644 test/Transforms/Inline/ARM/inline-target-attr.ll create mode 100644 test/Transforms/Inline/ARM/lit.local.cfg create mode 100644 test/Transforms/InstCombine/2017-07-07-UMul-ZExt.ll create mode 100644 test/Transforms/InstCombine/cmp-intrinsic.ll create mode 100644 test/Transforms/InstCombine/pr33689_same_bitwidth.ll create mode 100644 test/Transforms/LoopRotate/pr33701.ll create mode 100644 test/Transforms/LoopStrengthReduce/X86/lsr-filtering-scaledreg.ll create mode 100644 test/Transforms/LoopVectorize/X86/slm-no-vectorize.ll create mode 100644 test/Transforms/LoopVectorize/pr33706.ll create mode 100644 test/Transforms/NewGVN/pr33720.ll create mode 100644 test/Transforms/PGOProfile/counter_promo_nest.ll create mode 100644 test/Transforms/SimplifyCFG/implied-and-or.ll create mode 100644 test/Transforms/ThinLTOBitcodeWriter/pr33536.ll create mode 100644 test/tools/llvm-cov/threads.c create mode 100644 test/tools/llvm-objdump/ARM/Inputs/reloc-half.obj.macho-arm create mode 100644 test/tools/llvm-objdump/ARM/macho-reloc-half.test delete mode 100644 test/tools/llvm-objdump/Inputs/test.wasm create mode 100644 test/tools/llvm-objdump/Inputs/trivial.ll create mode 100644 test/tools/llvm-objdump/Inputs/trivial.obj.wasm create mode 100644 tools/llvm-pdbutil/DiffPrinter.cpp create mode 100644 tools/llvm-pdbutil/DiffPrinter.h create mode 100644 tools/opt-viewer/CMakeLists.txt rename {utils => tools}/opt-viewer/opt-diff.py (100%) rename {utils => tools}/opt-viewer/opt-stats.py (100%) rename {utils => tools}/opt-viewer/opt-viewer.py (98%) rename {utils => tools}/opt-viewer/optpmap.py (100%) rename {utils => tools}/opt-viewer/optrecord.py (100%) rename {utils => tools}/opt-viewer/style.css (100%) create mode 100644 unittests/IR/PassBuilderCallbacksTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index fc05f30e4cdb..61ecfdf970d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -288,6 +288,10 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')") mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) +set(LLVM_UTILS_INSTALL_DIR "bin" CACHE STRING + "Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)") +mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) + # They are used as destination of target generators. set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 2b54bdbf2900..1c922651b133 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -91,7 +91,7 @@ function(add_llvm_symbol_exports target_name export_file) DEPENDS ${export_file} VERBATIM COMMENT "Creating export file for ${target_name}") - if (${CMAKE_SYSTEM_NAME} MATCHES "SunOS") + if (${LLVM_LINKER_IS_SOLARISLD}) set_property(TARGET ${target_name} APPEND_STRING PROPERTY LINK_FLAGS " -Wl,-M,${CMAKE_CURRENT_BINARY_DIR}/${native_export_file}") else() @@ -148,13 +148,28 @@ function(add_llvm_symbol_exports target_name export_file) endfunction(add_llvm_symbol_exports) if(NOT WIN32 AND NOT APPLE) + # Detect what linker we have here execute_process( COMMAND ${CMAKE_C_COMPILER} -Wl,--version OUTPUT_VARIABLE stdout - ERROR_QUIET + ERROR_VARIABLE stderr ) + set(LLVM_LINKER_DETECTED ON) if("${stdout}" MATCHES "GNU gold") set(LLVM_LINKER_IS_GOLD ON) + message(STATUS "Linker detection: GNU Gold") + elseif("${stdout}" MATCHES "^LLD") + set(LLVM_LINKER_IS_LLD ON) + message(STATUS "Linker detection: LLD") + elseif("${stdout}" MATCHES "GNU ld") + set(LLVM_LINKER_IS_GNULD ON) + message(STATUS "Linker detection: GNU ld") + elseif("${stderr}" MATCHES "Solaris Link Editors") + set(LLVM_LINKER_IS_SOLARISLD ON) + message(STATUS "Linker detection: Solaris ld") + else() + set(LLVM_LINKER_DETECTED OFF) + message(STATUS "Linker detection: unknown") endif() endif() @@ -865,7 +880,7 @@ macro(add_llvm_utility name) set_target_properties(${name} PROPERTIES FOLDER "Utils") if( LLVM_INSTALL_UTILS AND LLVM_BUILD_UTILS ) install (TARGETS ${name} - RUNTIME DESTINATION bin + RUNTIME DESTINATION ${LLVM_UTILS_INSTALL_DIR} COMPONENT ${name}) if (NOT CMAKE_CONFIGURATION_TYPES) add_custom_target(install-${name} @@ -1159,11 +1174,6 @@ function(add_lit_target target comment) list(APPEND LIT_ARGS --param build_mode=${CMAKE_CFG_INTDIR}) endif () if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) - # reset cache after erraneous r283029 - # TODO: remove this once all buildbots run - if (LIT_COMMAND STREQUAL "${PYTHON_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py") - unset(LIT_COMMAND CACHE) - endif() set (LIT_COMMAND "${PYTHON_EXECUTABLE};${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py" CACHE STRING "Command used to spawn llvm-lit") else() diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 98f58d7b197d..0676317acc68 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -686,8 +686,8 @@ endif() # lld doesn't print colored diagnostics when invoked from Ninja if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja") include(CheckLinkerFlag) - check_linker_flag("-Wl,-color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS) - append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,-color-diagnostics" + check_linker_flag("-Wl,--color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS) + append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,--color-diagnostics" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake index d457389f3ca3..c851eb8dbf08 100644 --- a/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -195,8 +195,16 @@ function(llvm_ExternalProject_Add name source_dir) # Add top-level targets foreach(target ${ARG_EXTRA_TARGETS}) + string(REPLACE ":" ";" target_list ${target}) + list(GET target_list 0 target) + list(LENGTH target_list target_list_len) + if(${target_list_len} GREATER 1) + list(GET target_list 1 target_name) + else() + set(target_name "${target}") + endif() llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target} ${BINARY_DIR}) - add_custom_target(${target} + add_custom_target(${target_name} COMMAND ${build_runtime_cmd} DEPENDS ${name}-configure WORKING_DIRECTORY ${BINARY_DIR} diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 57822ae9ab0a..41c7ecba527f 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -190,9 +190,7 @@ names from both the *Processor* and *Alternative Processor* can be used. gfx810 - stoney amdgcn APU **GCN GFX9** -------------------------------------------------------------------- - gfx900 amdgcn dGPU - FirePro W9500 - - FirePro S9500 - - FirePro S9500x2 + gfx900 amdgcn dGPU - Radeon Vega Frontier Edition gfx901 amdgcn dGPU ROCm Same as gfx900 except XNACK is enabled diff --git a/docs/CMake.rst b/docs/CMake.rst index aeebc8f6acf9..bf97e9173158 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -536,6 +536,11 @@ LLVM-specific variables during the build. Enabling this option can significantly speed up build times especially when building LLVM in Debug configurations. +**LLVM_REVERSE_ITERATION**:BOOL + If enabled, all supported unordered llvm containers would be iterated in + reverse order. This is useful for uncovering non-determinism caused by + iteration of unordered containers. + CMake Caches ============ diff --git a/docs/CMakePrimer.rst b/docs/CMakePrimer.rst index 1e3a09e4d98a..c29d627ee62c 100644 --- a/docs/CMakePrimer.rst +++ b/docs/CMakePrimer.rst @@ -112,33 +112,6 @@ In this example the ``extra_sources`` variable is only defined if you're targeting an Apple platform. For all other targets the ``extra_sources`` will be evaluated as empty before add_executable is given its arguments. -One big "Gotcha" with variable dereferencing is that ``if`` commands implicitly -dereference values. This has some unexpected results. For example: - -.. code-block:: cmake - - if("${SOME_VAR}" STREQUAL "MSVC") - -In this code sample MSVC will be implicitly dereferenced, which will result in -the if command comparing the value of the dereferenced variables ``SOME_VAR`` -and ``MSVC``. A common workaround to this solution is to prepend strings being -compared with an ``x``. - -.. code-block:: cmake - - if("x${SOME_VAR}" STREQUAL "xMSVC") - -This works because while ``MSVC`` is a defined variable, ``xMSVC`` is not. This -pattern is uncommon, but it does occur in LLVM's CMake scripts. - -.. note:: - - Once the LLVM project upgrades its minimum CMake version to 3.1 or later we - can prevent this behavior by setting CMP0054 to new. For more information on - CMake policies please see the cmake-policies manpage or the `cmake-policies - online documentation - `_. - Lists ----- diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst index b8299d44d48e..b4d15ef57b73 100644 --- a/docs/CommandGuide/lit.rst +++ b/docs/CommandGuide/lit.rst @@ -169,6 +169,13 @@ SELECTION OPTIONS must be in the range ``1..M``. The environment variable ``LIT_RUN_SHARD`` can also be used in place of this option. +.. option:: --filter=REGEXP + + Run only those tests whose name matches the regular expression specified in + ``REGEXP``. The environment variable ``LIT_FILTER`` can be also used in place + of this option, which is especially useful in environments where the call + to ``lit`` is issued indirectly. + ADDITIONAL OPTIONS ------------------ diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst index ea2e625bc4d2..47db8d04e0b2 100644 --- a/docs/CommandGuide/llvm-cov.rst +++ b/docs/CommandGuide/llvm-cov.rst @@ -262,6 +262,12 @@ OPTIONS The demangler is expected to read a newline-separated list of symbols from stdin and write a newline-separated list of the same length to stdout. +.. option:: -num-threads=N, -j=N + + Use N threads to write file reports (only applicable when -output-dir is + specified). When N=0, llvm-cov auto-detects an appropriate number of threads to + use. This is the default. + .. option:: -line-coverage-gt= Show code coverage only for functions with line coverage greater than the diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst index f7aa8309485b..5b6330b5dc40 100644 --- a/docs/CommandGuide/llvm-profdata.rst +++ b/docs/CommandGuide/llvm-profdata.rst @@ -192,6 +192,12 @@ OPTIONS information is dumped in a more human readable form (also in text) with annotations. +.. option:: -topn=n + + Instruct the profile dumper to show the top ``n`` functions with the + hottest basic blocks in the summary section. By default, the topn functions + are not dumped. + .. option:: -sample Specify that the input profile is a sample-based profile. diff --git a/docs/Coroutines.rst b/docs/Coroutines.rst index f7a38577fe8e..1bea04ebdd2a 100644 --- a/docs/Coroutines.rst +++ b/docs/Coroutines.rst @@ -846,7 +846,7 @@ Overview: """"""""" The '``llvm.coro.alloc``' intrinsic returns `true` if dynamic allocation is -required to obtain a memory for the corutine frame and `false` otherwise. +required to obtain a memory for the coroutine frame and `false` otherwise. Arguments: """""""""" diff --git a/docs/Docker.rst b/docs/Docker.rst index d873e1ebeeb4..e606e1b71a2c 100644 --- a/docs/Docker.rst +++ b/docs/Docker.rst @@ -88,15 +88,11 @@ compiled by the system compiler in the debian8 image: ./llvm/utils/docker/build_docker_image.sh \ --source debian8 \ --docker-repository clang-debian8 --docker-tag "staging" \ - -- \ -p clang -i install-clang -i install-clang-headers \ -- \ -DCMAKE_BUILD_TYPE=Release -Note there are two levels of ``--`` indirection. First one separates -``build_docker_image.sh`` arguments from ``llvm/utils/build_install_llvm.sh`` -arguments. Second one separates CMake arguments from ``build_install_llvm.sh`` -arguments. Note that build like that doesn't use a 2-stage build process that +Note that a build like that doesn't use a 2-stage build process that you probably want for clang. Running a 2-stage build is a little more intricate, this command will do that: @@ -108,7 +104,6 @@ this command will do that: ./build_docker_image.sh \ --source debian8 \ --docker-repository clang-debian8 --docker-tag "staging" \ - -- \ -p clang -i stage2-install-clang -i stage2-install-clang-headers \ -- \ -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ @@ -178,7 +173,6 @@ debian8-based image using the latest ``google/stable`` sources for you: ./llvm/utils/docker/build_docker_image.sh \ -s debian8 --d clang-debian8 -t "staging" \ - -- \ --branch branches/google/stable \ -p clang -i install-clang -i install-clang-headers \ -- \ diff --git a/docs/Dummy.html b/docs/Dummy.html deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/docs/HowToAddABuilder.rst b/docs/HowToAddABuilder.rst index 08cbecdc2a57..201c71b21391 100644 --- a/docs/HowToAddABuilder.rst +++ b/docs/HowToAddABuilder.rst @@ -62,6 +62,9 @@ Here are the steps you can follow to do so: lab.llvm.org:9990 \ + To point a slave to silent master please use lab.llvm.org:9994 instead + of lab.llvm.org:9990. + #. Fill the buildslave description and admin name/e-mail. Here is an example of the buildslave description:: diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 2a0812ab930f..44efc1498060 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -2209,12 +2209,21 @@ For a simpler introduction to the ordering constraints, see the same address in this global order. This corresponds to the C++0x/C1x ``memory_order_seq_cst`` and Java volatile. -.. _singlethread: +.. _syncscope: -If an atomic operation is marked ``singlethread``, it only *synchronizes -with* or participates in modification and seq\_cst total orderings with -other operations running in the same thread (for example, in signal -handlers). +If an atomic operation is marked ``syncscope("singlethread")``, it only +*synchronizes with* and only participates in the seq\_cst total orderings of +other operations running in the same thread (for example, in signal handlers). + +If an atomic operation is marked ``syncscope("")``, where +```` is a target specific synchronization scope, then it is target +dependent if it *synchronizes with* and participates in the seq\_cst total +orderings of other operations. + +Otherwise, an atomic operation that is not marked ``syncscope("singlethread")`` +or ``syncscope("")`` *synchronizes with* and participates in the +seq\_cst total orderings of other operations that are not marked +``syncscope("singlethread")`` or ``syncscope("")``. .. _fastmath: @@ -5034,7 +5043,7 @@ which is the string ``llvm.loop.licm_versioning.disable``. For example: Loop distribution allows splitting a loop into multiple loops. Currently, this is only performed if the entire loop cannot be vectorized due to unsafe -memory dependencies. The transformation will atempt to isolate the unsafe +memory dependencies. The transformation will attempt to isolate the unsafe dependencies into their own loop. This metadata can be used to selectively enable or disable distribution of the @@ -7380,7 +7389,7 @@ Syntax: :: = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] - = load atomic [volatile] , * [singlethread] , align [, !invariant.group !] + = load atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ! = !{ i32 1 } ! = !{i64 } ! = !{ i64 } @@ -7401,14 +7410,14 @@ modify the number or order of execution of this ``load`` with other :ref:`volatile operations `. If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering -` and optional ``singlethread`` argument. The ``release`` and -``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads -produce :ref:`defined ` results when they may see multiple atomic -stores. The type of the pointee must be an integer, pointer, or floating-point -type whose bit width is a power of two greater than or equal to eight and less -than or equal to a target-specific size limit. ``align`` must be explicitly -specified on atomic loads, and the load has undefined behavior if the alignment -is not set to a value which is at least the size in bytes of the +` and optional ``syncscope("")`` argument. The +``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions. +Atomic loads produce :ref:`defined ` results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. ``align`` must be +explicitly specified on atomic loads, and the load has undefined behavior if the +alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. The optional constant ``align`` argument specifies the alignment of the @@ -7509,7 +7518,7 @@ Syntax: :: store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void - store atomic [volatile] , * [singlethread] , align [, !invariant.group !] ; yields void + store atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -7529,14 +7538,14 @@ allowed to modify the number or order of execution of this ``store`` with other structural type `) can be stored. If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering -` and optional ``singlethread`` argument. The ``acquire`` and -``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads -produce :ref:`defined ` results when they may see multiple atomic -stores. The type of the pointee must be an integer, pointer, or floating-point -type whose bit width is a power of two greater than or equal to eight and less -than or equal to a target-specific size limit. ``align`` must be explicitly -specified on atomic stores, and the store has undefined behavior if the -alignment is not set to a value which is at least the size in bytes of the +` and optional ``syncscope("")`` argument. The +``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions. +Atomic loads produce :ref:`defined ` results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. ``align`` must be +explicitly specified on atomic stores, and the store has undefined behavior if +the alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. The optional constant ``align`` argument specifies the alignment of the @@ -7597,7 +7606,7 @@ Syntax: :: - fence [singlethread] ; yields void + fence [syncscope("")] ; yields void Overview: """"""""" @@ -7631,17 +7640,17 @@ A ``fence`` which has ``seq_cst`` ordering, in addition to having both ``acquire`` and ``release`` semantics specified above, participates in the global program order of other ``seq_cst`` operations and/or fences. -The optional ":ref:`singlethread `" argument specifies -that the fence only synchronizes with other fences in the same thread. -(This is useful for interacting with signal handlers.) +A ``fence`` instruction can also take an optional +":ref:`syncscope `" argument. Example: """""""" .. code-block:: llvm - fence acquire ; yields void - fence singlethread seq_cst ; yields void + fence acquire ; yields void + fence syncscope("singlethread") seq_cst ; yields void + fence syncscope("agent") seq_cst ; yields void .. _i_cmpxchg: @@ -7653,7 +7662,7 @@ Syntax: :: - cmpxchg [weak] [volatile] * , , [singlethread] ; yields { ty, i1 } + cmpxchg [weak] [volatile] * , , [syncscope("")] ; yields { ty, i1 } Overview: """"""""" @@ -7682,10 +7691,8 @@ must be at least ``monotonic``, the ordering constraint on failure must be no stronger than that on success, and the failure ordering cannot be either ``release`` or ``acq_rel``. -The optional "``singlethread``" argument declares that the ``cmpxchg`` -is only atomic with respect to code (usually signal handlers) running in -the same thread as the ``cmpxchg``. Otherwise the cmpxchg is atomic with -respect to all other code in the system. +A ``cmpxchg`` instruction can also take an optional +":ref:`syncscope `" argument. The pointer passed into cmpxchg must have alignment greater than or equal to the size in memory of the operand. @@ -7739,7 +7746,7 @@ Syntax: :: - atomicrmw [volatile] * , [singlethread] ; yields ty + atomicrmw [volatile] * , [syncscope("")] ; yields ty Overview: """"""""" @@ -7773,6 +7780,9 @@ be a pointer to that type. If the ``atomicrmw`` is marked as order of execution of this ``atomicrmw`` with other :ref:`volatile operations `. +A ``atomicrmw`` instruction can also take an optional +":ref:`syncscope `" argument. + Semantics: """""""""" @@ -10272,6 +10282,8 @@ overlap. It copies "len" bytes of memory over. If the argument is known to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memmove: + '``llvm.memmove``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -10327,6 +10339,8 @@ copies "len" bytes of memory over. If the argument is known to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memset: + '``llvm.memset.*``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -14168,4 +14182,154 @@ In the most general case call to the '``llvm.memcpy.element.unordered.atomic.*`` lowered to a call to the symbol ``__llvm_memcpy_element_unordered_atomic_*``. Where '*' is replaced with an actual element size. +Optimizer is allowed to inline memory copy when it's profitable to do so. + +'``llvm.memmove.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use +``llvm.memmove.element.unordered.atomic`` on any integer bit width and for +different address spaces. Not all targets support all bit widths however. + +:: + + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* , + i8* , + i32 , + i32 ) + declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* , + i8* , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memmove.element.unordered.atomic.*``' intrinsic is a specialization +of the '``llvm.memmove.*``' intrinsic. It differs in that the ``dest`` and +``src`` are treated as arrays with elements that are exactly ``element_size`` +bytes, and the copy between buffers uses a sequence of +:ref:`unordered atomic ` load/store operations that are a positive +integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the +:ref:`@llvm.memmove ` intrinsic, with the added constraint that +``len`` is required to be a positive integer multiple of the ``element_size``. +If ``len`` is not a positive integer multiple of ``element_size``, then the +behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no +greater than a target-specific atomic access size limit. + +For each of the input pointers the ``align`` parameter attribute must be +specified. It must be a power of two no less than the ``element_size``. Caller +guarantees that both the source and destination pointers are aligned to that +boundary. + +Semantics: +"""""""""" + +The '``llvm.memmove.element.unordered.atomic.*``' intrinsic copies ``len`` bytes +of memory from the source location to the destination location. These locations +are allowed to overlap. The memory copy is performed as a sequence of load/store +operations where each access is guaranteed to be a multiple of ``element_size`` +bytes wide and aligned at an ``element_size`` boundary. + +The order of the copy is unspecified. The same value may be read from the source +buffer many times, but only one write is issued to the destination buffer per +element. It is well defined to have concurrent reads and writes to both source +and destination provided those reads and writes are unordered atomic when +specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered loads from the source location and stores to the +destination. + +Lowering: +""""""""" + +In the most general case call to the +'``llvm.memmove.element.unordered.atomic.*``' is lowered to a call to the symbol +``__llvm_memmove_element_unordered_atomic_*``. Where '*' is replaced with an +actual element size. + The optimizer is allowed to inline the memory copy when it's profitable to do so. + +.. _int_memset_element_unordered_atomic: + +'``llvm.memset.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on +any integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* , + i8 , + i32 , + i32 ) + declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* , + i8 , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array +with elements that are exactly ``element_size`` bytes, and the assignment to that array +uses uses a sequence of :ref:`unordered atomic ` store operations +that are a positive integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the :ref:`@llvm.memset ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. + +The ``dest`` input pointer must have the ``align`` parameter attribute specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +the destination pointer is aligned to that boundary. + +Semantics: +"""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of +memory starting at the destination location to the given ``value``. The memory is +set with a sequence of store operations where each access is guaranteed to be a +multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. + +The order of the assignment is unspecified. Only one write is issued to the +destination buffer per element. It is well defined to have concurrent reads and +writes to the destination provided those reads and writes are unordered atomic +when specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered stores to the destination. + +Lowering: +""""""""" + +In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. + +The optimizer is allowed to inline the memory assignment when it's profitable to do so. + diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index 5acfa04ce1f4..0f0b0e2e6fbd 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -587,7 +587,7 @@ The simplest way is to have a statically initialized global object inside Alternatively, you may define an optional init function and it will receive the program arguments that you can read and modify. Do this **only** if you -realy need to access ``argv``/``argc``. +really need to access ``argv``/``argc``. .. code-block:: c++ diff --git a/docs/tutorial/BuildingAJIT1.rst b/docs/tutorial/BuildingAJIT1.rst index 625cbbba1a5c..88f7aa5abbc7 100644 --- a/docs/tutorial/BuildingAJIT1.rst +++ b/docs/tutorial/BuildingAJIT1.rst @@ -12,7 +12,7 @@ Welcome to Chapter 1 of the "Building an ORC-based JIT in LLVM" tutorial. This tutorial runs through the implementation of a JIT compiler using LLVM's On-Request-Compilation (ORC) APIs. It begins with a simplified version of the KaleidoscopeJIT class used in the -`Implementing a language with LLVM `_ tutorials and then +`Implementing a language with LLVM `_ tutorials and then introduces new features like optimization, lazy compilation and remote execution. @@ -41,7 +41,7 @@ The structure of the tutorial is: a remote process with reduced privileges using the JIT Remote APIs. To provide input for our JIT we will use the Kaleidoscope REPL from -`Chapter 7 `_ of the "Implementing a language in LLVM tutorial", +`Chapter 7 `_ of the "Implementing a language in LLVM tutorial", with one minor modification: We will remove the FunctionPassManager from the code for that chapter and replace it with optimization support in our JIT class in Chapter #2. @@ -91,8 +91,8 @@ KaleidoscopeJIT In the previous section we described our API, now we examine a simple implementation of it: The KaleidoscopeJIT class [1]_ that was used in the -`Implementing a language with LLVM `_ tutorials. We will use -the REPL code from `Chapter 7 `_ of that tutorial to supply the +`Implementing a language with LLVM `_ tutorials. We will use +the REPL code from `Chapter 7 `_ of that tutorial to supply the input for our JIT: Each time the user enters an expression the REPL will add a new IR module containing the code for that expression to the JIT. If the expression is a top-level expression like '1+1' or 'sin(x)', the REPL will also diff --git a/docs/tutorial/BuildingAJIT2.rst b/docs/tutorial/BuildingAJIT2.rst index 839875266a24..2f22bdad6c14 100644 --- a/docs/tutorial/BuildingAJIT2.rst +++ b/docs/tutorial/BuildingAJIT2.rst @@ -25,7 +25,7 @@ IRTransformLayer, to add IR optimization support to KaleidoscopeJIT. Optimizing Modules using the IRTransformLayer ============================================= -In `Chapter 4 `_ of the "Implementing a language with LLVM" +In `Chapter 4 `_ of the "Implementing a language with LLVM" tutorial series the llvm *FunctionPassManager* is introduced as a means for optimizing LLVM IR. Interested readers may read that chapter for details, but in short: to optimize a Module we create an llvm::FunctionPassManager @@ -148,7 +148,7 @@ At the bottom of our JIT we add a private method to do the actual optimization: *optimizeModule*. This function sets up a FunctionPassManager, adds some passes to it, runs it over every function in the module, and then returns the mutated module. The specific optimizations are the same ones used in -`Chapter 4 `_ of the "Implementing a language with LLVM" +`Chapter 4 `_ of the "Implementing a language with LLVM" tutorial series. Readers may visit that chapter for a more in-depth discussion of these, and of IR optimization in general. diff --git a/docs/tutorial/LangImpl02.rst b/docs/tutorial/LangImpl02.rst index 4be447eb5ba3..d72c8dc9add4 100644 --- a/docs/tutorial/LangImpl02.rst +++ b/docs/tutorial/LangImpl02.rst @@ -10,7 +10,7 @@ Chapter 2 Introduction Welcome to Chapter 2 of the "`Implementing a language with LLVM `_" tutorial. This chapter shows you how to use the -lexer, built in `Chapter 1 `_, to build a full +lexer, built in `Chapter 1 `_, to build a full `parser `_ for our Kaleidoscope language. Once we have a parser, we'll define and build an `Abstract Syntax Tree `_ (AST). diff --git a/docs/tutorial/LangImpl03.rst b/docs/tutorial/LangImpl03.rst index 1dfe10175c74..fab2ddaf8829 100644 --- a/docs/tutorial/LangImpl03.rst +++ b/docs/tutorial/LangImpl03.rst @@ -10,7 +10,7 @@ Chapter 3 Introduction Welcome to Chapter 3 of the "`Implementing a language with LLVM `_" tutorial. This chapter shows you how to transform -the `Abstract Syntax Tree `_, built in Chapter 2, into +the `Abstract Syntax Tree `_, built in Chapter 2, into LLVM IR. This will teach you a little bit about how LLVM does things, as well as demonstrate how easy it is to use. It's much more work to build a lexer and parser than it is to generate LLVM IR code. :) @@ -362,7 +362,7 @@ end of the new basic block. Basic blocks in LLVM are an important part of functions that define the `Control Flow Graph `_. Since we don't have any control flow, our functions will only contain one block -at this point. We'll fix this in `Chapter 5 `_ :). +at this point. We'll fix this in `Chapter 5 `_ :). Next we add the function arguments to the NamedValues map (after first clearing it out) so that they're accessible to ``VariableExprAST`` nodes. @@ -540,7 +540,7 @@ functions referencing each other. This wraps up the third chapter of the Kaleidoscope tutorial. Up next, we'll describe how to `add JIT codegen and optimizer -support `_ to this so we can actually start running +support `_ to this so we can actually start running code! Full Code Listing diff --git a/docs/tutorial/LangImpl04.rst b/docs/tutorial/LangImpl04.rst index 16d7164ae15e..921c4dcc21ad 100644 --- a/docs/tutorial/LangImpl04.rst +++ b/docs/tutorial/LangImpl04.rst @@ -622,7 +622,7 @@ This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At this point, we can compile a non-Turing-complete programming language, optimize and JIT compile it in a user-driven way. Next up we'll look into `extending the language with control flow -constructs `_, tackling some interesting LLVM IR issues +constructs `_, tackling some interesting LLVM IR issues along the way. Full Code Listing diff --git a/docs/tutorial/LangImpl05.rst b/docs/tutorial/LangImpl05.rst index dcf45bcbf8d2..8650892e8f8b 100644 --- a/docs/tutorial/LangImpl05.rst +++ b/docs/tutorial/LangImpl05.rst @@ -269,7 +269,7 @@ Phi nodes: #. Values that are implicit in the structure of your AST, such as the Phi node in this case. -In `Chapter 7 `_ of this tutorial ("mutable variables"), +In `Chapter 7 `_ of this tutorial ("mutable variables"), we'll talk about #1 in depth. For now, just believe me that you don't need SSA construction to handle this case. For #2, you have the choice of using the techniques that we will describe for #1, or you can insert @@ -790,7 +790,7 @@ of the tutorial. In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors to know. In the next chapter of our saga, we will get a bit crazier and add `user-defined -operators `_ to our poor innocent language. +operators `_ to our poor innocent language. Full Code Listing ================= diff --git a/docs/tutorial/LangImpl06.rst b/docs/tutorial/LangImpl06.rst index c1035bce8559..cb8ec766bb26 100644 --- a/docs/tutorial/LangImpl06.rst +++ b/docs/tutorial/LangImpl06.rst @@ -41,7 +41,7 @@ The point of going into user-defined operators in a tutorial like this is to show the power and flexibility of using a hand-written parser. Thus far, the parser we have been implementing uses recursive descent for most parts of the grammar and operator precedence parsing for the -expressions. See `Chapter 2 `_ for details. By +expressions. See `Chapter 2 `_ for details. By using operator precedence parsing, it is very easy to allow the programmer to introduce new operators into the grammar: the grammar is dynamically extensible as the JIT runs. @@ -734,7 +734,7 @@ side-effects, but it can't actually define and mutate a variable itself. Strikingly, variable mutation is an important feature of some languages, and it is not at all obvious how to `add support for mutable -variables `_ without having to add an "SSA construction" +variables `_ without having to add an "SSA construction" phase to your front-end. In the next chapter, we will describe how you can add variable mutation without building SSA in your front-end. diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst index 6e17de4b2bde..d06bf6ec252a 100644 --- a/docs/tutorial/OCamlLangImpl5.rst +++ b/docs/tutorial/OCamlLangImpl5.rst @@ -258,7 +258,7 @@ a truth value as a 1-bit (bool) value. let then_bb = append_block context "then" the_function in position_at_end then_bb builder; -As opposed to the `C++ tutorial `_, we have to build our +As opposed to the `C++ tutorial `_, we have to build our basic blocks bottom up since we can't have dangling BasicBlocks. We start off by saving a pointer to the first block (which might not be the entry block), which we'll need to build a conditional branch later. We diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h index f99722f60e91..5a2148a14a14 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h @@ -48,6 +48,7 @@ class KaleidoscopeJIT { KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } @@ -74,9 +75,8 @@ class KaleidoscopeJIT { // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return CompileLayer.addModule(std::move(M), - make_unique(), - std::move(Resolver)); + return cantFail(CompileLayer.addModule(std::move(M), + std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -87,7 +87,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - CompileLayer.removeModule(H); + cantFail(CompileLayer.removeModule(H)); } }; diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp index 163caa6872d7..2471344c6d65 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter1/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h index 2cd4ed79aafa..9a295f1566cb 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h @@ -57,6 +57,7 @@ class KaleidoscopeJIT { KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, [this](std::shared_ptr M) { @@ -87,9 +88,8 @@ class KaleidoscopeJIT { // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModule(std::move(M), - make_unique(), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -100,7 +100,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModule(H); + cantFail(OptimizeLayer.removeModule(H)); } private: diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp index 163caa6872d7..2471344c6d65 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter2/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h index f6fb3071d526..a03f5ce5e238 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h @@ -63,6 +63,7 @@ class KaleidoscopeJIT { KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, [this](std::shared_ptr M) { @@ -100,9 +101,7 @@ class KaleidoscopeJIT { // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return CODLayer.addModule(std::move(M), - make_unique(), - std::move(Resolver)); + return cantFail(CODLayer.addModule(std::move(M), std::move(Resolver))); } JITSymbol findSymbol(const std::string Name) { @@ -113,7 +112,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - CODLayer.removeModule(H); + cantFail(CODLayer.removeModule(H)); } private: diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp index 163caa6872d7..2471344c6d65 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter3/toy.cpp @@ -1150,7 +1150,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h index d45874e9a693..d10e4748f1a1 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter4/KaleidoscopeJIT.h @@ -90,6 +90,7 @@ class KaleidoscopeJIT { KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, [this](std::shared_ptr M) { @@ -127,9 +128,8 @@ class KaleidoscopeJIT { // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModule(std::move(M), - make_unique(), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } Error addFunctionAST(std::unique_ptr FnAST) { @@ -175,7 +175,7 @@ class KaleidoscopeJIT { addModule(std::move(M)); auto Sym = findSymbol(SharedFnAST->getName() + "$impl"); assert(Sym && "Couldn't find compiled function?"); - JITTargetAddress SymAddr = Sym.getAddress(); + JITTargetAddress SymAddr = cantFail(Sym.getAddress()); if (auto Err = IndirectStubsMgr->updatePointer(mangle(SharedFnAST->getName()), SymAddr)) { @@ -195,7 +195,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModule(H); + cantFail(OptimizeLayer.removeModule(H)); } private: diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp index ff4b5220105b..ed8ae31ba0fd 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter4/toy.cpp @@ -1153,7 +1153,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h index e889c6d34322..7ea535b3af53 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/KaleidoscopeJIT.h @@ -97,6 +97,15 @@ class KaleidoscopeJIT { : TM(EngineBuilder().selectTarget(Triple(Remote.getTargetTriple()), "", "", SmallVector())), DL(TM->createDataLayout()), + ObjectLayer([&Remote]() { + std::unique_ptr MemMgr; + if (auto Err = Remote.createRemoteMemoryManager(MemMgr)) { + logAllUnhandledErrors(std::move(Err), errs(), + "Error creating remote memory manager:"); + exit(1); + } + return MemMgr; + }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)), OptimizeLayer(CompileLayer, [this](std::shared_ptr M) { @@ -146,18 +155,10 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }); - std::unique_ptr MemMgr; - if (auto Err = Remote.createRemoteMemoryManager(MemMgr)) { - logAllUnhandledErrors(std::move(Err), errs(), - "Error creating remote memory manager:"); - exit(1); - } - // Add the set to the JIT with the resolver we created above and a newly // created SectionMemoryManager. - return OptimizeLayer.addModule(std::move(M), - std::move(MemMgr), - std::move(Resolver)); + return cantFail(OptimizeLayer.addModule(std::move(M), + std::move(Resolver))); } Error addFunctionAST(std::unique_ptr FnAST) { @@ -203,7 +204,7 @@ class KaleidoscopeJIT { addModule(std::move(M)); auto Sym = findSymbol(SharedFnAST->getName() + "$impl"); assert(Sym && "Couldn't find compiled function?"); - JITTargetAddress SymAddr = Sym.getAddress(); + JITTargetAddress SymAddr = cantFail(Sym.getAddress()); if (auto Err = IndirectStubsMgr->updatePointer(mangle(SharedFnAST->getName()), SymAddr)) { @@ -227,7 +228,7 @@ class KaleidoscopeJIT { } void removeModule(ModuleHandle H) { - OptimizeLayer.removeModule(H); + cantFail(OptimizeLayer.removeModule(H)); } private: diff --git a/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp b/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp index edd050959d6b..7bbc06a0958f 100644 --- a/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/BuildingAJIT/Chapter5/toy.cpp @@ -1177,7 +1177,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - ExitOnErr(TheJIT->executeRemoteExpr(ExprSymbol.getAddress())); + ExitOnErr(TheJIT->executeRemoteExpr(cantFail(ExprSymbol.getAddress()))); // Delete the anonymous expression module from the JIT. TheJIT->removeModule(H); diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index cf7d6c2bee04..921fa8908040 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -611,7 +611,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 6852973bae40..2d23bdb26c21 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -885,7 +885,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index 0c2221735589..b5e4495539fc 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -1004,7 +1004,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 79ac7b33d7a1..32f4a658c5d2 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -1173,7 +1173,7 @@ static void HandleTopLevelExpression() { // Get the symbol's address and cast it to the right type (takes no // arguments, returns a double) so we can call it as a native function. - double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress(); + double (*FP)() = (double (*)())(intptr_t)cantFail(ExprSymbol.getAddress()); fprintf(stderr, "Evaluated to %f\n", FP()); // Delete the anonymous expression module from the JIT. diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h index fe73d717976d..215ce03af99b 100644 --- a/examples/Kaleidoscope/include/KaleidoscopeJIT.h +++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h @@ -45,6 +45,7 @@ class KaleidoscopeJIT { KaleidoscopeJIT() : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()), + ObjectLayer([]() { return std::make_shared(); }), CompileLayer(ObjectLayer, SimpleCompiler(*TM)) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } @@ -62,9 +63,8 @@ class KaleidoscopeJIT { return JITSymbol(nullptr); }, [](const std::string &S) { return nullptr; }); - auto H = CompileLayer.addModule(std::move(M), - make_unique(), - std::move(Resolver)); + auto H = cantFail(CompileLayer.addModule(std::move(M), + std::move(Resolver))); ModuleHandles.push_back(H); return H; @@ -72,7 +72,7 @@ class KaleidoscopeJIT { void removeModule(ModuleHandleT H) { ModuleHandles.erase(find(ModuleHandles, H)); - CompileLayer.removeModule(H); + cantFail(CompileLayer.removeModule(H)); } JITSymbol findSymbol(const std::string Name) { @@ -115,7 +115,7 @@ class KaleidoscopeJIT { return JITSymbol(SymAddr, JITSymbolFlags::Exported); #ifdef LLVM_ON_WIN32 - // For Windows retry without "_" at begining, as RTDyldMemoryManager uses + // For Windows retry without "_" at beginning, as RTDyldMemoryManager uses // GetProcAddress and standard libraries like msvcrt.dll use names // with and without "_" (for example "_itoa" but "sin"). if (Name.length() > 2 && Name[0] == '_') diff --git a/include/llvm-c/OrcBindings.h b/include/llvm-c/OrcBindings.h index d86ea8808889..7ee395431358 100644 --- a/include/llvm-c/OrcBindings.h +++ b/include/llvm-c/OrcBindings.h @@ -113,8 +113,9 @@ void LLVMOrcDisposeMangledSymbol(char *MangledSymbol); /** * Create a lazy compile callback. */ -LLVMOrcTargetAddress +LLVMOrcErrorCode LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx); @@ -135,8 +136,9 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, /** * Add module to be eagerly compiled. */ -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx); @@ -144,8 +146,9 @@ LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, /** * Add module to be lazily compiled one function at a time. */ -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx); @@ -153,10 +156,11 @@ LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, /** * Add an object file. */ -LLVMOrcModuleHandle LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, - LLVMSharedObjectBufferRef Obj, - LLVMOrcSymbolResolverFn SymbolResolver, - void *SymbolResolverCtx); +LLVMOrcErrorCode LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, + LLVMSharedObjectBufferRef Obj, + LLVMOrcSymbolResolverFn SymbolResolver, + void *SymbolResolverCtx); /** * Remove a module set from the JIT. @@ -164,18 +168,20 @@ LLVMOrcModuleHandle LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, * This works for all modules that can be added via OrcAdd*, including object * files. */ -void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H); +LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle H); /** * Get symbol address from JIT instance. */ -LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - const char *SymbolName); +LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, + const char *SymbolName); /** * Dispose of an ORC JIT stack. */ -void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); +LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); #ifdef __cplusplus } diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index e5f0c35534ac..a1cce6e5fe17 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -401,7 +401,11 @@ class LLVM_NODISCARD APInt { /// \brief Determine if this is a value of 1. /// /// This checks to see if the value of this APInt is one. - bool isOneValue() const { return getActiveBits() == 1; } + bool isOneValue() const { + if (isSingleWord()) + return U.VAL == 1; + return countLeadingZerosSlowCase() == BitWidth - 1; + } /// \brief Determine if this is the largest unsigned value. /// diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 8c28412bb607..83f289c42a23 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -100,6 +100,8 @@ class function_ref { } public: + function_ref() : callback(nullptr) {} + template function_ref(Callable &&callable, typename std::enable_if< @@ -110,6 +112,8 @@ class function_ref { Ret operator()(Params ...params) const { return callback(callable, std::forward(params)...); } + + operator bool() const { return callback; } }; // deleter - Very very very simple method that is used to invoke operator diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index a2ad74b1e04a..4e8a2490ee3c 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -15,9 +15,9 @@ #ifndef LLVM_ADT_SMALLPTRSET_H #define LLVM_ADT_SMALLPTRSET_H -#include "llvm/Config/abi-breaking.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include "llvm/Support/ReverseIteration.h" #include "llvm/Support/type_traits.h" #include #include @@ -29,15 +29,6 @@ namespace llvm { -#if LLVM_ENABLE_ABI_BREAKING_CHECKS -template struct ReverseIterate { static bool value; }; -#if LLVM_ENABLE_REVERSE_ITERATION -template bool ReverseIterate::value = true; -#else -template bool ReverseIterate::value = false; -#endif -#endif - /// SmallPtrSetImplBase - This is the common code shared among all the /// SmallPtrSet<>'s, which is almost everything. SmallPtrSet has two modes, one /// for small and one for large sets. diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 3e05e09900a5..5de3821242e0 100644 --- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -1353,4 +1353,4 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits { #undef DEBUG_TYPE -#endif +#endif // LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h index a15a9e18c815..32868cbecdcf 100644 --- a/include/llvm/Analysis/CGSCCPassManager.h +++ b/include/llvm/Analysis/CGSCCPassManager.h @@ -577,12 +577,17 @@ class CGSCCToFunctionPassAdaptor // analyses will eventually occur when the module pass completes. PA.intersect(std::move(PassPA)); - // Update the call graph based on this function pass. This may also - // update the current SCC to point to a smaller, more refined SCC. - CurrentC = &updateCGAndAnalysisManagerForFunctionPass( - CG, *CurrentC, *N, AM, UR, DebugLogging); - assert(CG.lookupSCC(*N) == CurrentC && - "Current SCC not updated to the SCC containing the current node!"); + // If the call graph hasn't been preserved, update it based on this + // function pass. This may also update the current SCC to point to + // a smaller, more refined SCC. + auto PAC = PA.getChecker(); + if (!PAC.preserved() && !PAC.preservedSet>()) { + CurrentC = &updateCGAndAnalysisManagerForFunctionPass( + CG, *CurrentC, *N, AM, UR, DebugLogging); + assert( + CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); + } } // By definition we preserve the proxy. And we preserve all analyses on diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index ce0b7895f253..f33a2de5a5f4 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -160,7 +160,7 @@ InlineParams getInlineParams(int Threshold); /// the -Oz flag. InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); -/// Return the cost associated with a callsite, including paramater passing +/// Return the cost associated with a callsite, including parameter passing /// and the call/return instruction. int getCallsiteCost(CallSite CS, const DataLayout &DL); diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index ad7f5c80549f..3a052761ad7d 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -652,17 +652,23 @@ class LazyCallGraph { /// Make an existing internal ref edge into a call edge. /// /// This may form a larger cycle and thus collapse SCCs into TargetN's SCC. - /// If that happens, the deleted SCC pointers are returned. These SCCs are - /// not in a valid state any longer but the pointers will remain valid - /// until destruction of the parent graph instance for the purpose of - /// clearing cached information. + /// If that happens, the optional callback \p MergedCB will be invoked (if + /// provided) on the SCCs being merged away prior to actually performing + /// the merge. Note that this will never include the target SCC as that + /// will be the SCC functions are merged into to resolve the cycle. Once + /// this function returns, these merged SCCs are not in a valid state but + /// the pointers will remain valid until destruction of the parent graph + /// instance for the purpose of clearing cached information. This function + /// also returns 'true' if a cycle was formed and some SCCs merged away as + /// a convenience. /// /// After this operation, both SourceN's SCC and TargetN's SCC may move /// position within this RefSCC's postorder list. Any SCCs merged are /// merged into the TargetN's SCC in order to preserve reachability analyses /// which took place on that SCC. - SmallVector switchInternalEdgeToCall(Node &SourceN, - Node &TargetN); + bool switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref MergedSCCs)> MergeCB = {}); /// Make an existing internal call edge between separate SCCs into a ref /// edge. diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 60dafccd84bd..23ab372703ee 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -224,6 +224,9 @@ class ObjectSizeOffsetVisitor SizeOffsetType visitSelectInst(SelectInst &I); SizeOffsetType visitUndefValue(UndefValue&); SizeOffsetType visitInstruction(Instruction &I); + +private: + bool CheckedZextOrTrunc(APInt &I); }; typedef std::pair SizeOffsetEvalType; diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h index c0337b6daf37..cd4ec0a03a9e 100644 --- a/include/llvm/Analysis/RegionInfoImpl.h +++ b/include/llvm/Analysis/RegionInfoImpl.h @@ -34,10 +34,10 @@ #include #include -namespace llvm { - #define DEBUG_TYPE "region" +namespace llvm { + //===----------------------------------------------------------------------===// /// RegionBase Implementation template @@ -901,8 +901,8 @@ void RegionInfoBase::calculate(FuncT &F) { buildRegionsTree(DT->getNode(BB), TopLevelRegion); } -#undef DEBUG_TYPE - } // end namespace llvm +#undef DEBUG_TYPE + #endif // LLVM_ANALYSIS_REGIONINFOIMPL_H diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 68fbf640994c..dfb525e3de7a 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -753,6 +753,28 @@ class TargetTransformInfo { Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const; + /// \returns The type to use in a loop expansion of a memcpy call. + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const; + + /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. + /// \param RemainingBytes The number of bytes to copy. + /// + /// Calculates the operand types to use when copying \p RemainingBytes of + /// memory, where source and destination alignments are \p SrcAlign and + /// \p DestAlign respectively. + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const; + + /// \returns True if we want to test the new memcpy lowering functionality in + /// Transform/Utils. + /// Temporary. Will be removed once we move to the new functionality and + /// remove the old. + bool useWideIRMemcpyLoopLowering() const; + /// \returns True if the two functions have compatible attributes for inlining /// purposes. bool areInlineCompatible(const Function *Caller, @@ -953,6 +975,12 @@ class TargetTransformInfo::Concept { virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) = 0; + virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const = 0; + virtual void getMemcpyLoopResidualLoweringType( + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; @@ -1266,6 +1294,19 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { Type *ExpectedType) override { return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const override { + return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign); + } + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const override { + Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); + } bool areInlineCompatible(const Function *Caller, const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 0246fc1c02cc..8740ee92eed5 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -444,6 +444,20 @@ class TargetTransformInfoImplBase { return nullptr; } + Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, + unsigned SrcAlign, unsigned DestAlign) const { + return Type::getInt8Ty(Context); + } + + void getMemcpyLoopResidualLoweringType(SmallVectorImpl &OpsOut, + LLVMContext &Context, + unsigned RemainingBytes, + unsigned SrcAlign, + unsigned DestAlign) const { + for (unsigned i = 0; i != RemainingBytes; ++i) + OpsOut.push_back(Type::getInt8Ty(Context)); + } + bool areInlineCompatible(const Function *Caller, const Function *Callee) const { return (Caller->getFnAttribute("target-cpu") == diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index e953ec8ab6ab..f4c57d4289fc 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -523,8 +523,7 @@ template class ArrayRef; /// (A) Optional isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL, - bool InvertAPred = false, - unsigned Depth = 0, + bool LHSIsFalse = false, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index eef473b20dde..23e30b7a868d 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -94,7 +94,7 @@ struct WasmFunction { }; struct WasmDataSegment { - uint32_t Index; + uint32_t MemoryIndex; WasmInitExpr Offset; ArrayRef Content; }; @@ -107,7 +107,7 @@ struct WasmElemSegment { struct WasmRelocation { uint32_t Type; // The type of the relocation. - int32_t Index; // Index into function to global index space. + uint32_t Index; // Index into function to global index space. uint64_t Offset; // Offset from the start of the section. int64_t Addend; // A value to add to the symbol. }; diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 5435e48ff424..3777f956cf27 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -59,6 +59,8 @@ enum BlockIDs { FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID, SYMTAB_BLOCK_ID, + + SYNC_SCOPE_NAMES_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -172,6 +174,10 @@ enum OperandBundleTagCode { OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; +enum SyncScopeNameCode { + SYNC_SCOPE_NAME = 1, +}; + // Value symbol table codes. enum ValueSymtabCodes { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] @@ -404,12 +410,6 @@ enum AtomicOrderingCodes { ORDERING_SEQCST = 6 }; -/// Encoded SynchronizationScope values. -enum AtomicSynchScopeCodes { - SYNCHSCOPE_SINGLETHREAD = 0, - SYNCHSCOPE_CROSSTHREAD = 1 -}; - /// Markers and flags for call instruction. enum CallMarkersFlags { CALL_TAIL = 0, diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index c898667f1474..60bbc9aaa5bd 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -608,8 +608,8 @@ class AsmPrinter : public MachineFunctionPass { // Internal Implementation Details //===------------------------------------------------------------------===// - /// This emits visibility information about symbol, if this is suported by the - /// target. + /// This emits visibility information about symbol, if this is supported by + /// the target. void EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition = true) const; diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index a740df96899d..b59fd60e8aed 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -428,7 +428,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); + bool IsFloat = Ty->isFPOrFPVectorTy(); // Assume that floating point arithmetic operations cost twice as much as // integer operations. unsigned OpCost = (IsFloat ? 2 : 1); diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index ec60123e54b1..59a4073646eb 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -16,14 +16,17 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H +#include "llvm/ADT/SmallVector.h" #include #include #include #include #include +#include namespace llvm { +class LLT; class MachineInstr; class MachineInstrBuilder; class MachineOperand; @@ -58,6 +61,131 @@ class PredicateBitsetImpl : public std::bitset { } }; +enum { + /// Record the specified instruction + /// - NewInsnID - Instruction ID to define + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_RecordInsn, + + /// Check the feature bits + /// - Expected features + GIM_CheckFeatures, + + /// Check the opcode on the specified instruction + /// - InsnID - Instruction ID + /// - Expected opcode + GIM_CheckOpcode, + /// Check the instruction has the right number of operands + /// - InsnID - Instruction ID + /// - Expected number of operands + GIM_CheckNumOperands, + + /// Check the type for the specified operand + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected type + GIM_CheckType, + /// Check the register bank for the specified operand + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected register bank (specified as a register class) + GIM_CheckRegBankForClass, + /// Check the operand matches a complex predicate + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - RendererID - The renderer to hold the result + /// - Complex predicate ID + GIM_CheckComplexPattern, + /// Check the operand is a specific integer + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected integer + GIM_CheckConstantInt, + /// Check the operand is a specific literal integer (i.e. MO.isImm() or MO.isCImm() is true). + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected integer + GIM_CheckLiteralInt, + /// Check the operand is a specific intrinsic ID + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - Expected Intrinsic ID + GIM_CheckIntrinsicID, + /// Check the specified operand is an MBB + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + GIM_CheckIsMBB, + + /// Check if the specified operand is safe to fold into the current + /// instruction. + /// - InsnID - Instruction ID + GIM_CheckIsSafeToFold, + + //=== Renderers === + + /// Mutate an instruction + /// - NewInsnID - Instruction ID to define + /// - OldInsnID - Instruction ID to mutate + /// - NewOpcode - The new opcode to use + GIR_MutateOpcode, + /// Build a new instruction + /// - InsnID - Instruction ID to define + /// - Opcode - The new opcode to use + GIR_BuildMI, + + /// Copy an operand to the specified instruction + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + GIR_Copy, + /// Copy an operand to the specified instruction + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + /// - SubRegIdx - The subregister to copy + GIR_CopySubReg, + /// Add an implicit register def to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddImplicitDef, + /// Add an implicit register use to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddImplicitUse, + /// Add an register to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RegNum - The register to add + GIR_AddRegister, + /// Add an immediate to the specified instruction + /// - InsnID - Instruction ID to modify + /// - Imm - The immediate to add + GIR_AddImm, + /// Render complex operands to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RendererID - The renderer to call + GIR_ComplexRenderer, + + /// Constrain an instruction operand to a register class. + /// - InsnID - Instruction ID to modify + /// - OpIdx - Operand index + /// - RCEnum - Register class enumeration value + GIR_ConstrainOperandRC, + /// Constrain an instructions operands according to the instruction + /// description. + /// - InsnID - Instruction ID to modify + GIR_ConstrainSelectedInstOperands, + /// Merge all memory operands into instruction. + /// - InsnID - Instruction ID to modify + GIR_MergeMemOperands, + /// Erase from parent. + /// - InsnID - Instruction ID to erase + GIR_EraseFromParent, + + /// A successful emission + GIR_Done, +}; + /// Provides the logic to select generic machine instructions. class InstructionSelector { public: @@ -78,9 +206,39 @@ class InstructionSelector { protected: using ComplexRendererFn = std::function; + using RecordedMIVector = SmallVector; + using NewMIVector = SmallVector; + struct MatcherState { + std::vector Renderers; + RecordedMIVector MIs; + + MatcherState(unsigned MaxRenderers); + }; + +public: + template + struct MatcherInfoTy { + const LLT *TypeObjects; + const PredicateBitset *FeatureBitsets; + const std::vector ComplexPredicates; + }; + +protected: InstructionSelector(); + /// Execute a given matcher table and return true if the match was successful + /// and false otherwise. + template + bool executeMatchTable( + TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, + const MatcherInfoTy &MatcherInfo, + const int64_t *MatchTable, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures) const; + /// Constrain a register operand of an instruction \p I to a specified /// register class. This could involve inserting COPYs before (for uses) or /// after (for defs) and may replace the operand of \p I. diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h new file mode 100644 index 000000000000..98b6b859b9e2 --- /dev/null +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -0,0 +1,337 @@ +//==-- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h ---------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file declares the API for the instruction selector. +/// This class is responsible for selecting machine instructions. +/// It's implemented by the target. It's used by the InstructionSelect pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H +#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H + +namespace llvm { +template +bool InstructionSelector::executeMatchTable( + TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State, + const MatcherInfoTy &MatcherInfo, + const int64_t *MatchTable, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI, + const PredicateBitset &AvailableFeatures) const { + const int64_t *Command = MatchTable; + while (true) { + switch (*Command++) { + case GIM_RecordInsn: { + int64_t NewInsnID = *Command++; + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + + // As an optimisation we require that MIs[0] is always the root. Refuse + // any attempt to modify it. + assert(NewInsnID != 0 && "Refusing to modify MIs[0]"); + (void)NewInsnID; + + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isReg()) { + DEBUG(dbgs() << "Rejected (not a register)\n"); + return false; + } + if (TRI.isPhysicalRegister(MO.getReg())) { + DEBUG(dbgs() << "Rejected (is a physical register)\n"); + return false; + } + + assert((size_t)NewInsnID == State.MIs.size() && + "Expected to store MIs in order"); + State.MIs.push_back(MRI.getVRegDef(MO.getReg())); + DEBUG(dbgs() << "MIs[" << NewInsnID << "] = GIM_RecordInsn(" << InsnID + << ", " << OpIdx << ")\n"); + break; + } + + case GIM_CheckFeatures: { + int64_t ExpectedBitsetID = *Command++; + DEBUG(dbgs() << "GIM_CheckFeatures(ExpectedBitsetID=" << ExpectedBitsetID + << ")\n"); + if ((AvailableFeatures & MatcherInfo.FeatureBitsets[ExpectedBitsetID]) != + MatcherInfo.FeatureBitsets[ExpectedBitsetID]) { + DEBUG(dbgs() << "Rejected\n"); + return false; + } + break; + } + + case GIM_CheckOpcode: { + int64_t InsnID = *Command++; + int64_t Expected = *Command++; + + unsigned Opcode = State.MIs[InsnID]->getOpcode(); + DEBUG(dbgs() << "GIM_CheckOpcode(MIs[" << InsnID << "], ExpectedOpcode=" + << Expected << ") // Got=" << Opcode << "\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (Opcode != Expected) + return false; + break; + } + case GIM_CheckNumOperands: { + int64_t InsnID = *Command++; + int64_t Expected = *Command++; + DEBUG(dbgs() << "GIM_CheckNumOperands(MIs[" << InsnID + << "], Expected=" << Expected << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (State.MIs[InsnID]->getNumOperands() != Expected) + return false; + break; + } + + case GIM_CheckType: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t TypeID = *Command++; + DEBUG(dbgs() << "GIM_CheckType(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "), TypeID=" << TypeID << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()) != + MatcherInfo.TypeObjects[TypeID]) + return false; + break; + } + case GIM_CheckRegBankForClass: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RCEnum = *Command++; + DEBUG(dbgs() << "GIM_CheckRegBankForClass(MIs[" << InsnID + << "]->getOperand(" << OpIdx << "), RCEnum=" << RCEnum + << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (&RBI.getRegBankFromRegClass(*TRI.getRegClass(RCEnum)) != + RBI.getRegBank(State.MIs[InsnID]->getOperand(OpIdx).getReg(), MRI, TRI)) + return false; + break; + } + case GIM_CheckComplexPattern: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RendererID = *Command++; + int64_t ComplexPredicateID = *Command++; + DEBUG(dbgs() << "State.Renderers[" << RendererID + << "] = GIM_CheckComplexPattern(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), ComplexPredicateID=" << ComplexPredicateID << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + // FIXME: Use std::invoke() when it's available. + if (!(State.Renderers[RendererID] = + (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + State.MIs[InsnID]->getOperand(OpIdx)))) + return false; + break; + } + case GIM_CheckConstantInt: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckConstantInt(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!isOperandImmEqual(State.MIs[InsnID]->getOperand(OpIdx), Value, MRI)) + return false; + break; + } + case GIM_CheckLiteralInt: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckLiteralInt(MIs[" << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); + if (!OM.isCImm() || !OM.getCImm()->equalsInt(Value)) + return false; + break; + } + case GIM_CheckIntrinsicID: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t Value = *Command++; + DEBUG(dbgs() << "GIM_CheckIntrinsicID(MIs[" << InsnID << "]->getOperand(" << OpIdx + << "), Value=" << Value << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); + if (!OM.isIntrinsicID() || OM.getIntrinsicID() != Value) + return false; + break; + } + case GIM_CheckIsMBB: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + DEBUG(dbgs() << "GIM_CheckIsMBB(MIs[" << InsnID << "]->getOperand(" + << OpIdx << "))\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isMBB()) + return false; + break; + } + + case GIM_CheckIsSafeToFold: { + int64_t InsnID = *Command++; + DEBUG(dbgs() << "GIM_CheckIsSafeToFold(MIs[" << InsnID << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + if (!isObviouslySafeToFold(*State.MIs[InsnID])) + return false; + break; + } + + case GIR_MutateOpcode: { + int64_t OldInsnID = *Command++; + int64_t NewInsnID = *Command++; + int64_t NewOpcode = *Command++; + assert((size_t)NewInsnID == OutMIs.size() && + "Expected to store MIs in order"); + OutMIs.push_back( + MachineInstrBuilder(*State.MIs[OldInsnID]->getParent()->getParent(), + State.MIs[OldInsnID])); + OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode)); + DEBUG(dbgs() << "GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs[" + << OldInsnID << "], " << NewOpcode << ")\n"); + break; + } + case GIR_BuildMI: { + int64_t InsnID = *Command++; + int64_t Opcode = *Command++; + assert((size_t)InsnID == OutMIs.size() && + "Expected to store MIs in order"); + (void)InsnID; + OutMIs.push_back(BuildMI(*State.MIs[0]->getParent(), State.MIs[0], + State.MIs[0]->getDebugLoc(), TII.get(Opcode))); + DEBUG(dbgs() << "GIR_BuildMI(OutMIs[" << InsnID << "], " << Opcode + << ")\n"); + break; + } + + case GIR_Copy: { + int64_t NewInsnID = *Command++; + int64_t OldInsnID = *Command++; + int64_t OpIdx = *Command++; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(OpIdx)); + DEBUG(dbgs() << "GIR_Copy(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID + << "], " << OpIdx << ")\n"); + break; + } + case GIR_CopySubReg: { + int64_t NewInsnID = *Command++; + int64_t OldInsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t SubRegIdx = *Command++; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + OutMIs[NewInsnID].addReg(State.MIs[OldInsnID]->getOperand(OpIdx).getReg(), + 0, SubRegIdx); + DEBUG(dbgs() << "GIR_CopySubReg(OutMIs[" << NewInsnID << "], MIs[" + << OldInsnID << "], " << OpIdx << ", " << SubRegIdx + << ")\n"); + break; + } + case GIR_AddImplicitDef: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addDef(RegNum, RegState::Implicit); + DEBUG(dbgs() << "GIR_AddImplicitDef(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddImplicitUse: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addUse(RegNum, RegState::Implicit); + DEBUG(dbgs() << "GIR_AddImplicitUse(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddRegister: { + int64_t InsnID = *Command++; + int64_t RegNum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addReg(RegNum); + DEBUG(dbgs() << "GIR_AddRegister(OutMIs[" << InsnID << "], " << RegNum + << ")\n"); + break; + } + case GIR_AddImm: { + int64_t InsnID = *Command++; + int64_t Imm = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + OutMIs[InsnID].addImm(Imm); + DEBUG(dbgs() << "GIR_AddImm(OutMIs[" << InsnID << "], " << Imm << ")\n"); + break; + } + case GIR_ComplexRenderer: { + int64_t InsnID = *Command++; + int64_t RendererID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + State.Renderers[RendererID](OutMIs[InsnID]); + DEBUG(dbgs() << "GIR_ComplexRenderer(OutMIs[" << InsnID << "], " + << RendererID << ")\n"); + break; + } + + case GIR_ConstrainOperandRC: { + int64_t InsnID = *Command++; + int64_t OpIdx = *Command++; + int64_t RCEnum = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx, + *TRI.getRegClass(RCEnum), TII, TRI, RBI); + DEBUG(dbgs() << "GIR_ConstrainOperandRC(OutMIs[" << InsnID << "], " + << OpIdx << ", " << RCEnum << ")\n"); + break; + } + case GIR_ConstrainSelectedInstOperands: { + int64_t InsnID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + constrainSelectedInstRegOperands(*OutMIs[InsnID].getInstr(), TII, TRI, + RBI); + DEBUG(dbgs() << "GIR_ConstrainSelectedInstOperands(OutMIs[" << InsnID + << "])\n"); + break; + } + case GIR_MergeMemOperands: { + int64_t InsnID = *Command++; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + for (const auto *FromMI : State.MIs) + for (const auto &MMO : FromMI->memoperands()) + OutMIs[InsnID].addMemOperand(MMO); + DEBUG(dbgs() << "GIR_MergeMemOperands(OutMIs[" << InsnID << "])\n"); + break; + } + case GIR_EraseFromParent: { + int64_t InsnID = *Command++; + assert(State.MIs[InsnID] && + "Attempted to erase an undefined instruction"); + State.MIs[InsnID]->eraseFromParent(); + DEBUG(dbgs() << "GIR_EraseFromParent(MIs[" << InsnID << "])\n"); + break; + } + + case GIR_Done: + DEBUG(dbgs() << "GIR_Done"); + return true; + + default: + llvm_unreachable("Unexpected command"); + } + } +} + +} // end namespace llvm + +#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 5197ba869c0a..1fd45b52e3ac 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -101,11 +101,11 @@ class LegalizerHelper { const LegalizerInfo &LI; }; -/// Helper function that replaces \p MI with a libcall. +/// Helper function that creates the given libcall. LegalizerHelper::LegalizeResult -replaceWithLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, - RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, - ArrayRef Args); +createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef Args); } // End namespace llvm. diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index c9327d50432e..85e6fef1f3c2 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" @@ -59,6 +60,21 @@ class MachineIRBuilder { } void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend); + MachineInstrBuilder buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1); + + unsigned getDestFromArg(unsigned Reg) { return Reg; } + unsigned getDestFromArg(LLT Ty) { + return getMF().getRegInfo().createGenericVirtualRegister(Ty); + } + unsigned getDestFromArg(const TargetRegisterClass *RC) { + return getMF().getRegInfo().createVirtualRegister(RC); + } + + unsigned getRegFromArg(unsigned Reg) { return Reg; } + + unsigned getRegFromArg(const MachineInstrBuilder &MIB) { + return MIB->getOperand(0).getReg(); + } public: /// Getter for the function we currently build. @@ -120,6 +136,22 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildInstr(unsigned Opcode); + /// DAG like Generic method for building arbitrary instructions as above. + /// \Opc opcode for the instruction. + /// \Ty Either LLT/TargetRegisterClass/unsigned types for Dst + /// \Args Variadic list of uses of types(unsigned/MachineInstrBuilder) + /// Uses of type MachineInstrBuilder will perform + /// getOperand(0).getReg() to convert to register. + template + MachineInstrBuilder buildInstr(unsigned Opc, DstTy &&Ty, + UseArgsTy &&... Args) { + auto MIB = buildInstr(Opc).addDef(getDestFromArg(Ty)); + unsigned It[] = {(getRegFromArg(Args))...}; + for (const auto &i : It) + MIB.addUse(i); + return MIB; + } + /// Build but don't insert = \p Opcode . /// /// \pre setMF, setBasicBlock or setMI must have been called. @@ -188,6 +220,11 @@ class MachineIRBuilder { /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildAdd(unsigned Res, unsigned Op0, unsigned Op1); + template + MachineInstrBuilder buildAdd(DstTy &&Ty, UseArgsTy &&... UseArgs) { + unsigned Res = getDestFromArg(Ty); + return buildAdd(Res, (getRegFromArg(UseArgs))...); + } /// Build and insert \p Res = G_SUB \p Op0, \p Op1 /// @@ -295,6 +332,18 @@ class MachineIRBuilder { MachineInstrBuilder buildAnd(unsigned Res, unsigned Op0, unsigned Op1); + /// Build and insert \p Res = G_OR \p Op0, \p Op1 + /// + /// G_OR sets \p Res to the bitwise or of integer parameters \p Op0 and \p + /// Op1. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers + /// with the same (scalar or vector) type). + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildOr(unsigned Res, unsigned Op0, unsigned Op1); + /// Build and insert \p Res = G_ANYEXT \p Op0 /// /// G_ANYEXT produces a register of the specified width, with bits 0 to @@ -416,6 +465,10 @@ class MachineIRBuilder { /// \return The newly created instruction. MachineInstrBuilder buildConstant(unsigned Res, int64_t Val); + template + MachineInstrBuilder buildConstant(DstType &&Res, int64_t Val) { + return buildConstant(getDestFromArg(Res), Val); + } /// Build and insert \p Res = G_FCONSTANT \p Val /// /// G_FCONSTANT is a floating-point constant with the specified size and diff --git a/include/llvm/CodeGen/LiveRegUnits.h b/include/llvm/CodeGen/LiveRegUnits.h index fa1ec867ea3d..c28b1a06854f 100644 --- a/include/llvm/CodeGen/LiveRegUnits.h +++ b/include/llvm/CodeGen/LiveRegUnits.h @@ -93,12 +93,14 @@ class LiveRegUnits { } /// Updates liveness when stepping backwards over the instruction \p MI. + /// This removes all register units defined or clobbered in \p MI and then + /// adds the units used (as in use operands) in \p MI. void stepBackward(const MachineInstr &MI); - /// Mark all register units live during instruction \p MI. - /// This can be used to accumulate live/unoccupied registers over a range of - /// instructions. - void accumulateBackward(const MachineInstr &MI); + /// Adds all register units used, defined or clobbered in \p MI. + /// This is useful when walking over a range of instruction to find registers + /// unused over the whole range. + void accumulate(const MachineInstr &MI); /// Adds registers living out of block \p MBB. /// Live out registers are the union of the live-in registers of the successor diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index f67da7b01c54..19173fa39bdc 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -650,7 +650,7 @@ class MachineFunction { MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h index 78adce507b8c..a9de0db05d72 100644 --- a/include/llvm/CodeGen/MachineMemOperand.h +++ b/include/llvm/CodeGen/MachineMemOperand.h @@ -114,6 +114,9 @@ class MachineMemOperand { MOInvariant = 1u << 5, // Reserved for use by target-specific passes. + // Targets may override getSerializableMachineMemOperandTargetFlags() to + // enable MIR serialization/parsing of these flags. If more of these flags + // are added, the MIR printing/parsing code will need to be updated as well. MOTargetFlag1 = 1u << 6, MOTargetFlag2 = 1u << 7, MOTargetFlag3 = 1u << 8, @@ -124,8 +127,8 @@ class MachineMemOperand { private: /// Atomic information for this memory operation. struct MachineAtomicInfo { - /// Synchronization scope for this memory operation. - unsigned SynchScope : 1; // enum SynchronizationScope + /// Synchronization scope ID for this memory operation. + unsigned SSID : 8; // SyncScope::ID /// Atomic ordering requirements for this memory operation. For cmpxchg /// atomic operations, atomic ordering requirements when store occurs. unsigned Ordering : 4; // enum AtomicOrdering @@ -152,7 +155,7 @@ class MachineMemOperand { unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, AtomicOrdering Ordering = AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic); @@ -202,9 +205,9 @@ class MachineMemOperand { /// Return the range tag for the memory reference. const MDNode *getRanges() const { return Ranges; } - /// Return the synchronization scope for this memory operation. - SynchronizationScope getSynchScope() const { - return static_cast(AtomicInfo.SynchScope); + /// Returns the synchronization scope ID for this memory operation. + SyncScope::ID getSyncScopeID() const { + return static_cast(AtomicInfo.SSID); } /// Return the atomic ordering requirements for this memory operation. For diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 8c3aacaa8efc..08151be11083 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -340,6 +340,18 @@ namespace RTLIB { MEMCPY_ELEMENT_UNORDERED_ATOMIC_8, MEMCPY_ELEMENT_UNORDERED_ATOMIC_16, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, + MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, + + MEMSET_ELEMENT_UNORDERED_ATOMIC_1, + MEMSET_ELEMENT_UNORDERED_ATOMIC_2, + MEMSET_ELEMENT_UNORDERED_ATOMIC_4, + MEMSET_ELEMENT_UNORDERED_ATOMIC_8, + MEMSET_ELEMENT_UNORDERED_ATOMIC_16, + // EXCEPTION HANDLING UNWIND_RESUME, @@ -515,6 +527,17 @@ namespace RTLIB { /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or /// UNKNOW_LIBCALL if there is none. Libcall getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMMOVE_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + } } diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 4d72eda5c71a..25afc5b506df 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -235,6 +235,9 @@ class TargetRegisterInfo; "SDep::Output edge cannot use the zero register!"); Contents.Reg = Reg; } + + raw_ostream &print(raw_ostream &O, + const TargetRegisterInfo *TRI = nullptr) const; }; template <> @@ -458,7 +461,10 @@ class TargetRegisterInfo; void dump(const ScheduleDAG *G) const; void dumpAll(const ScheduleDAG *G) const; - void print(raw_ostream &O, const ScheduleDAG *G) const; + raw_ostream &print(raw_ostream &O, + const SUnit *N = nullptr, + const SUnit *X = nullptr) const; + raw_ostream &print(raw_ostream &O, const ScheduleDAG *G) const; private: void ComputeDepth(); diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index f3f3003b7e20..55a23c3cca9b 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -927,7 +927,7 @@ class SelectionDAG { SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO); @@ -937,7 +937,7 @@ class SelectionDAG { SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO); diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index d9f8af0e21d1..db42fb6c170c 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1213,8 +1213,8 @@ class MemSDNode : public SDNode { /// Returns the Ranges that describes the dereference. const MDNode *getRanges() const { return MMO->getRanges(); } - /// Return the synchronization scope for this memory operation. - SynchronizationScope getSynchScope() const { return MMO->getSynchScope(); } + /// Returns the synchronization scope ID for this memory operation. + SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } /// Return the atomic ordering requirements for this memory operation. For /// cmpxchg atomic operations, return the atomic ordering requirements when @@ -1432,8 +1432,8 @@ class ConstantSDNode : public SDNode { int64_t getSExtValue() const { return Value->getSExtValue(); } bool isOne() const { return Value->isOne(); } - bool isNullValue() const { return Value->isNullValue(); } - bool isAllOnesValue() const { return Value->isAllOnesValue(); } + bool isNullValue() const { return Value->isZero(); } + bool isAllOnesValue() const { return Value->isMinusOne(); } bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 7941af8be8af..cdfc1745cea5 100644 --- a/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -735,6 +735,10 @@ class Compile3Sym : public SymbolRecord { uint16_t VersionBackendQFE; StringRef Version; + void setLanguage(SourceLanguage Lang) { + Flags = CompileSym3Flags((uint32_t(Flags) & 0xFFFFFF00) | uint32_t(Lang)); + } + uint8_t getLanguage() const { return static_cast(Flags) & 0xFF; } uint32_t getFlags() const { return static_cast(Flags) & ~0xFF; } diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h index 10d51c2d6244..e0c2226bdbd7 100644 --- a/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -10,9 +10,11 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/Endian.h" #include #include +#include namespace llvm { @@ -265,6 +267,23 @@ struct TypeIndexOffset { void printTypeIndex(ScopedPrinter &Printer, StringRef FieldName, TypeIndex TI, TypeCollection &Types); } -} + +template <> struct DenseMapInfo { + static inline codeview::TypeIndex getEmptyKey() { + return codeview::TypeIndex{DenseMapInfo::getEmptyKey()}; + } + static inline codeview::TypeIndex getTombstoneKey() { + return codeview::TypeIndex{DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const codeview::TypeIndex &TI) { + return DenseMapInfo::getHashValue(TI.getIndex()); + } + static bool isEqual(const codeview::TypeIndex &LHS, + const codeview::TypeIndex &RHS) { + return LHS == RHS; + } +}; + +} // namespace llvm #endif diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index 4126e245ff13..936813dc6abc 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -204,7 +204,9 @@ class LoadedObjectInfo { /// need to be consistent with the addresses used to query the DIContext and /// the output of this function should be deterministic, i.e. repeated calls with /// the same Sec should give the same address. - virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const = 0; + virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const { + return 0; + } /// If conveniently available, return the content of the given Section. /// @@ -221,12 +223,28 @@ class LoadedObjectInfo { return false; } + // FIXME: This is untested and unused anywhere in the LLVM project, it's + // used/needed by Julia (an external project). It should have some coverage + // (at least tests, but ideally example functionality). /// Obtain a copy of this LoadedObjectInfo. - /// - /// The caller is responsible for deallocation once the copy is no longer required. virtual std::unique_ptr clone() const = 0; }; +template +struct LoadedObjectInfoHelper : Base { +protected: + LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default; + LoadedObjectInfoHelper() = default; + +public: + template + LoadedObjectInfoHelper(Ts &&... Args) : Base(std::forward(Args)...) {} + + std::unique_ptr clone() const override { + return llvm::make_unique(static_cast(*this)); + } +}; + } // end namespace llvm #endif // LLVM_DEBUGINFO_DICONTEXT_H diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 739aa1f9ee74..ee2e805050c0 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -226,11 +226,7 @@ class DWARFContext : public DIContext { virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; virtual const DWARFSection &getInfoSection() = 0; - - using TypeSectionMap = MapVector>; - - virtual const TypeSectionMap &getTypesSections() = 0; + virtual void forEachTypesSections(function_ref F) = 0; virtual StringRef getAbbrevSection() = 0; virtual const DWARFSection &getLocSection() = 0; virtual StringRef getARangeSection() = 0; @@ -252,7 +248,8 @@ class DWARFContext : public DIContext { // Sections for DWARF5 split dwarf proposal. virtual const DWARFSection &getInfoDWOSection() = 0; - virtual const TypeSectionMap &getTypesDWOSections() = 0; + virtual void + forEachTypesDWOSections(function_ref F) = 0; virtual StringRef getAbbrevDWOSection() = 0; virtual const DWARFSection &getLineDWOSection() = 0; virtual const DWARFSection &getLocDWOSection() = 0; @@ -294,6 +291,9 @@ enum class ErrorPolicy { Halt, Continue }; class DWARFContextInMemory : public DWARFContext { virtual void anchor(); + using TypeSectionMap = MapVector>; + StringRef FileName; bool IsLittleEndian; uint8_t AddressSize; @@ -338,7 +338,8 @@ class DWARFContextInMemory : public DWARFContext { SmallVector, 4> UncompressedSections; - StringRef *MapSectionToMember(StringRef Name); + DWARFSection *mapNameToDWARFSection(StringRef Name); + StringRef *mapSectionToMember(StringRef Name); /// If Sec is compressed section, decompresses and updates its contents /// provided by Data. Otherwise leaves it unchanged. @@ -362,7 +363,10 @@ class DWARFContextInMemory : public DWARFContext { bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } const DWARFSection &getInfoSection() override { return InfoSection; } - const TypeSectionMap &getTypesSections() override { return TypesSections; } + void forEachTypesSections(function_ref F) override { + for (auto &P : TypesSections) + F(P.second); + } StringRef getAbbrevSection() override { return AbbrevSection; } const DWARFSection &getLocSection() override { return LocSection; } StringRef getARangeSection() override { return ARangeSection; } @@ -389,8 +393,9 @@ class DWARFContextInMemory : public DWARFContext { // Sections for DWARF5 split dwarf proposal. const DWARFSection &getInfoDWOSection() override { return InfoDWOSection; } - const TypeSectionMap &getTypesDWOSections() override { - return TypesDWOSections; + void forEachTypesDWOSections(function_ref F) override { + for (auto &P : TypesDWOSections) + F(P.second); } StringRef getAbbrevDWOSection() override { return AbbrevDWOSection; } diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index e4cb1b24e30d..c918a5d5e976 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -47,6 +47,7 @@ class DbiModuleDescriptorBuilder { DbiModuleDescriptorBuilder & operator=(const DbiModuleDescriptorBuilder &) = delete; + void setPdbFilePathNI(uint32_t NI); void setObjFileName(StringRef Name); void addSymbol(codeview::CVSymbol Symbol); @@ -68,6 +69,10 @@ class DbiModuleDescriptorBuilder { uint32_t calculateSerializedLength() const; + /// Return the offset within the module symbol stream of the next symbol + /// record passed to addSymbol. Add four to account for the signature. + uint32_t getNextSymbolOffset() const { return SymbolByteSize + 4; } + void finalize(); Error finalizeMsfLayout(); @@ -81,6 +86,7 @@ class DbiModuleDescriptorBuilder { msf::MSFBuilder &MSF; uint32_t SymbolByteSize = 0; + uint32_t PdbFilePathNI = 0; std::string ModuleName; std::string ObjFileName; std::vector SourceFiles; diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStream.h b/include/llvm/DebugInfo/PDB/Native/DbiStream.h index 3bf790726656..4be113f28d6f 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStream.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStream.h @@ -83,6 +83,8 @@ class DbiStream { FixedStreamArray getSectionMap() const; void visitSectionContributions(ISectionContribVisitor &Visitor) const; + Expected getECName(uint32_t NI) const; + private: Error initializeSectionContributionData(); Error initializeSectionHeadersData(); diff --git a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h index 744411854181..63eb34f0326a 100644 --- a/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h @@ -15,6 +15,7 @@ #include "llvm/Support/Error.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/BinaryByteStream.h" @@ -54,8 +55,13 @@ class DbiStreamBuilder { // Add given bytes as a new stream. Error addDbgStream(pdb::DbgHeaderType Type, ArrayRef Data); + uint32_t addECName(StringRef Name); + uint32_t calculateSerializedLength() const; + void setPublicsStreamIndex(uint32_t Index); + void setSymbolRecordStreamIndex(uint32_t Index); + Expected addModuleInfo(StringRef ModuleName); Error addModuleSourceFile(StringRef Module, StringRef File); Error addModuleSourceFile(DbiModuleDescriptorBuilder &Module, StringRef File); @@ -75,7 +81,7 @@ class DbiStreamBuilder { private: struct DebugStream { ArrayRef Data; - uint16_t StreamNumber = 0; + uint16_t StreamNumber = kInvalidStreamIndex; }; Error finalize(); @@ -87,7 +93,6 @@ class DbiStreamBuilder { uint32_t calculateNamesBufferSize() const; uint32_t calculateDbgStreamsSize() const; - Error generateModiSubstream(); Error generateFileInfoSubstream(); msf::MSFBuilder &Msf; @@ -100,6 +105,8 @@ class DbiStreamBuilder { uint16_t PdbDllRbld; uint16_t Flags; PDB_Machine MachineType; + uint32_t PublicsStreamIndex = kInvalidStreamIndex; + uint32_t SymRecordStreamIndex = kInvalidStreamIndex; const DbiStreamHeader *Header; @@ -108,6 +115,7 @@ class DbiStreamBuilder { StringMap SourceFileNames; + PDBStringTableBuilder ECNamesBuilder; WritableBinaryStreamRef NamesBuffer; MutableBinaryByteStream FileInfoBuffer; std::vector SectionContribs; diff --git a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h index 25f66240a6a2..17a82b7ce12d 100644 --- a/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ b/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -44,7 +44,7 @@ class NamedStreamMap { bool get(StringRef Stream, uint32_t &StreamNo) const; void set(StringRef Stream, uint32_t StreamNo); void remove(StringRef Stream); - + const StringMap &getStringMap() const { return Mapping; } iterator_range> entries() const; private: diff --git a/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h new file mode 100644 index 000000000000..4f532c6e3829 --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h @@ -0,0 +1,49 @@ +//===- NativeBuiltinSymbol.h -------------------------------------- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEBUILTINSYMBOL_H + +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" + +#include "llvm/DebugInfo/PDB/PDBTypes.h" + +namespace llvm { +namespace pdb { + +class NativeSession; + +class NativeBuiltinSymbol : public NativeRawSymbol { +public: + NativeBuiltinSymbol(NativeSession &PDBSession, SymIndexId Id, + PDB_BuiltinType T, uint64_t L); + ~NativeBuiltinSymbol() override; + + virtual std::unique_ptr clone() const override; + + void dump(raw_ostream &OS, int Indent) const override; + + PDB_SymType getSymTag() const override; + + PDB_BuiltinType getBuiltinType() const override; + bool isConstType() const override; + uint64_t getLength() const override; + bool isUnalignedType() const override; + bool isVolatileType() const override; + +protected: + NativeSession &Session; + PDB_BuiltinType Type; + uint64_t Length; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h index 1687737f0e7f..bd5c09e5ff76 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h @@ -18,7 +18,7 @@ namespace pdb { class NativeCompilandSymbol : public NativeRawSymbol { public: - NativeCompilandSymbol(NativeSession &Session, uint32_t SymbolId, + NativeCompilandSymbol(NativeSession &Session, SymIndexId SymbolId, DbiModuleDescriptor MI); std::unique_ptr clone() const override; diff --git a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h index 15bac78df191..ddb7f811da38 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h @@ -18,7 +18,7 @@ namespace pdb { class NativeExeSymbol : public NativeRawSymbol { public: - NativeExeSymbol(NativeSession &Session, uint32_t SymbolId); + NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId); std::unique_ptr clone() const override; diff --git a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h index a24a972879d2..66a9eae28e23 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeRawSymbol.h @@ -19,9 +19,11 @@ namespace pdb { class NativeSession; +typedef uint32_t SymIndexId; + class NativeRawSymbol : public IPDBRawSymbol { public: - NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId); + NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId); virtual std::unique_ptr clone() const = 0; @@ -205,7 +207,7 @@ class NativeRawSymbol : public IPDBRawSymbol { protected: NativeSession &Session; - uint32_t SymbolId; + SymIndexId SymbolId; }; } // end namespace pdb diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h index dd40874dc5f2..b16ce231c349 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -10,9 +10,13 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVESESSION_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h" #include "llvm/DebugInfo/PDB/IPDBSession.h" #include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h" +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -35,6 +39,8 @@ class NativeSession : public IPDBSession { std::unique_ptr createCompilandSymbol(DbiModuleDescriptor MI); + SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI); + uint64_t getLoadAddress() const override; void setLoadAddress(uint64_t Address) override; std::unique_ptr getGlobalScope() override; @@ -77,6 +83,7 @@ class NativeSession : public IPDBSession { std::unique_ptr Pdb; std::unique_ptr Allocator; std::vector> SymbolCache; + DenseMap TypeIndexToSymbolId; }; } } diff --git a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index cd7d3b063793..2dc23f819d3b 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -31,11 +31,13 @@ class MSFBuilder; namespace pdb { class DbiStreamBuilder; class InfoStreamBuilder; +class PublicsStreamBuilder; class TpiStreamBuilder; class PDBFileBuilder { public: explicit PDBFileBuilder(BumpPtrAllocator &Allocator); + ~PDBFileBuilder(); PDBFileBuilder(const PDBFileBuilder &) = delete; PDBFileBuilder &operator=(const PDBFileBuilder &) = delete; @@ -47,6 +49,7 @@ class PDBFileBuilder { TpiStreamBuilder &getTpiBuilder(); TpiStreamBuilder &getIpiBuilder(); PDBStringTableBuilder &getStringTableBuilder(); + PublicsStreamBuilder &getPublicsBuilder(); Error commit(StringRef Filename); @@ -61,6 +64,7 @@ class PDBFileBuilder { std::unique_ptr Msf; std::unique_ptr Info; std::unique_ptr Dbi; + std::unique_ptr Publics; std::unique_ptr Tpi; std::unique_ptr Ipi; diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h index 86ef1136b41d..29167c966d42 100644 --- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h +++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h @@ -56,7 +56,6 @@ class PDBStringTable { const PDBStringTableHeader *Header = nullptr; codeview::DebugStringTableSubsectionRef Strings; FixedStreamArray IDs; - uint32_t ByteSize = 0; uint32_t NameCount = 0; }; diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h index 4570c80c76d7..9ace826bd8f7 100644 --- a/include/llvm/DebugInfo/PDB/Native/PublicsStream.h +++ b/include/llvm/DebugInfo/PDB/Native/PublicsStream.h @@ -25,8 +25,6 @@ struct GSIHashHeader; class PDBFile; class PublicsStream { - struct HeaderInfo; - public: PublicsStream(PDBFile &File, std::unique_ptr Stream); ~PublicsStream(); @@ -65,7 +63,7 @@ class PublicsStream { FixedStreamArray ThunkMap; FixedStreamArray SectionOffsets; - const HeaderInfo *Header; + const PublicsStreamHeader *Header; const GSIHashHeader *HashHdr; }; } diff --git a/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h b/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h new file mode 100644 index 000000000000..5ab57ebef53d --- /dev/null +++ b/include/llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h @@ -0,0 +1,54 @@ +//===- PublicsStreamBuilder.h - PDB Publics Stream Creation -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_PDBPUBLICSTREAMBUILDER_H + +#include "llvm/DebugInfo/PDB/Native/RawConstants.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamRef.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace msf { +class MSFBuilder; +} +namespace pdb { +class PublicsStream; +struct PublicsStreamHeader; + +class PublicsStreamBuilder { +public: + explicit PublicsStreamBuilder(msf::MSFBuilder &Msf); + ~PublicsStreamBuilder(); + + PublicsStreamBuilder(const PublicsStreamBuilder &) = delete; + PublicsStreamBuilder &operator=(const PublicsStreamBuilder &) = delete; + + Error finalizeMsfLayout(); + uint32_t calculateSerializedLength() const; + + Error commit(BinaryStreamWriter &PublicsWriter); + + uint32_t getStreamIndex() const { return StreamIdx; } + uint32_t getRecordStreamIdx() const { return RecordStreamIdx; } + +private: + uint32_t StreamIdx = kInvalidStreamIndex; + uint32_t RecordStreamIdx = kInvalidStreamIndex; + std::vector HashRecords; + msf::MSFBuilder &Msf; +}; +} // namespace pdb +} // namespace llvm + +#endif diff --git a/include/llvm/DebugInfo/PDB/Native/RawTypes.h b/include/llvm/DebugInfo/PDB/Native/RawTypes.h index 771272d6a47d..a3cdd3f09a44 100644 --- a/include/llvm/DebugInfo/PDB/Native/RawTypes.h +++ b/include/llvm/DebugInfo/PDB/Native/RawTypes.h @@ -255,6 +255,19 @@ struct ModuleInfoHeader { /// char ObjFileName[]; }; +// This is PSGSIHDR struct defined in +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h +struct PublicsStreamHeader { + support::ulittle32_t SymHash; + support::ulittle32_t AddrMap; + support::ulittle32_t NumThunks; + support::ulittle32_t SizeOfThunk; + support::ulittle16_t ISectThunkTable; + char Padding[2]; + support::ulittle32_t OffThunkTable; + support::ulittle32_t NumSections; +}; + /// Defines a 128-bit unique identifier. This maps to a GUID on Windows, but /// is abstracted here for the purposes of non-Windows platforms that don't have /// the GUID structure defined. diff --git a/include/llvm/ExecutionEngine/JITSymbol.h b/include/llvm/ExecutionEngine/JITSymbol.h index f09e95fddb97..4172f240ba39 100644 --- a/include/llvm/ExecutionEngine/JITSymbol.h +++ b/include/llvm/ExecutionEngine/JITSymbol.h @@ -21,6 +21,8 @@ #include #include +#include "llvm/Support/Error.h" + namespace llvm { class GlobalValue; @@ -41,10 +43,11 @@ class JITSymbolFlags { enum FlagNames : UnderlyingType { None = 0, - Weak = 1U << 0, - Common = 1U << 1, - Absolute = 1U << 2, - Exported = 1U << 3 + HasError = 1U << 0, + Weak = 1U << 1, + Common = 1U << 2, + Absolute = 1U << 3, + Exported = 1U << 4 }; /// @brief Default-construct a JITSymbolFlags instance. @@ -53,6 +56,11 @@ class JITSymbolFlags { /// @brief Construct a JITSymbolFlags instance from the given flags. JITSymbolFlags(FlagNames Flags) : Flags(Flags) {} + /// @brief Return true if there was an error retrieving this symbol. + bool hasError() const { + return (Flags & HasError) == HasError; + } + /// @brief Returns true is the Weak flag is set. bool isWeak() const { return (Flags & Weak) == Weak; @@ -113,11 +121,17 @@ class JITEvaluatedSymbol { /// @brief Represents a symbol in the JIT. class JITSymbol { public: - using GetAddressFtor = std::function; + using GetAddressFtor = std::function()>; - /// @brief Create a 'null' symbol that represents failure to find a symbol - /// definition. - JITSymbol(std::nullptr_t) {} + /// @brief Create a 'null' symbol, used to represent a "symbol not found" + /// result from a successful (non-erroneous) lookup. + JITSymbol(std::nullptr_t) + : CachedAddr(0) {} + + /// @brief Create a JITSymbol representing an error in the symbol lookup + /// process (e.g. a network failure during a remote lookup). + JITSymbol(Error Err) + : Err(std::move(Err)), Flags(JITSymbolFlags::HasError) {} /// @brief Create a symbol for a definition with a known address. JITSymbol(JITTargetAddress Addr, JITSymbolFlags Flags) @@ -137,18 +151,59 @@ class JITSymbol { /// user can materialize the definition at any time by calling the getAddress /// method. JITSymbol(GetAddressFtor GetAddress, JITSymbolFlags Flags) - : GetAddress(std::move(GetAddress)), Flags(Flags) {} + : GetAddress(std::move(GetAddress)), CachedAddr(0), Flags(Flags) {} + + JITSymbol(const JITSymbol&) = delete; + JITSymbol& operator=(const JITSymbol&) = delete; + + JITSymbol(JITSymbol &&Other) + : GetAddress(std::move(Other.GetAddress)), Flags(std::move(Other.Flags)) { + if (Flags.hasError()) + Err = std::move(Other.Err); + else + CachedAddr = std::move(Other.CachedAddr); + } + + JITSymbol& operator=(JITSymbol &&Other) { + GetAddress = std::move(Other.GetAddress); + Flags = std::move(Other.Flags); + if (Flags.hasError()) + Err = std::move(Other.Err); + else + CachedAddr = std::move(Other.CachedAddr); + return *this; + } + + ~JITSymbol() { + if (Flags.hasError()) + Err.~Error(); + else + CachedAddr.~JITTargetAddress(); + } /// @brief Returns true if the symbol exists, false otherwise. - explicit operator bool() const { return CachedAddr || GetAddress; } + explicit operator bool() const { + return !Flags.hasError() && (CachedAddr || GetAddress); + } + + /// @brief Move the error field value out of this JITSymbol. + Error takeError() { + if (Flags.hasError()) + return std::move(Err); + return Error::success(); + } /// @brief Get the address of the symbol in the target address space. Returns /// '0' if the symbol does not exist. - JITTargetAddress getAddress() { + Expected getAddress() { + assert(!Flags.hasError() && "getAddress called on error value"); if (GetAddress) { - CachedAddr = GetAddress(); - assert(CachedAddr && "Symbol could not be materialized."); - GetAddress = nullptr; + if (auto CachedAddrOrErr = GetAddress()) { + GetAddress = nullptr; + CachedAddr = *CachedAddrOrErr; + assert(CachedAddr && "Symbol could not be materialized."); + } else + return CachedAddrOrErr.takeError(); } return CachedAddr; } @@ -157,7 +212,10 @@ class JITSymbol { private: GetAddressFtor GetAddress; - JITTargetAddress CachedAddr = 0; + union { + JITTargetAddress CachedAddr; + Error Err; + }; JITSymbolFlags Flags; }; diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 8ac1b6bca0a7..c1acca386820 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -146,7 +146,7 @@ class CompileOnDemandLayer { std::unique_ptr)>; struct SourceModuleEntry { - std::unique_ptr> SourceMod; + std::shared_ptr SourceMod; std::set StubsToClone; }; @@ -154,7 +154,7 @@ class CompileOnDemandLayer { using SourceModuleHandle = typename SourceModulesList::size_type; SourceModuleHandle - addSourceModule(std::unique_ptr> M) { + addSourceModule(std::shared_ptr M) { SourceModuleHandle H = SourceModules.size(); SourceModules.push_back(SourceModuleEntry()); SourceModules.back().SourceMod = std::move(M); @@ -162,7 +162,7 @@ class CompileOnDemandLayer { } Module& getSourceModule(SourceModuleHandle H) { - return SourceModules[H].SourceMod->getResource(); + return *SourceModules[H].SourceMod; } std::set& getStubsToClone(SourceModuleHandle H) { @@ -176,19 +176,21 @@ class CompileOnDemandLayer { for (auto BLH : BaseLayerHandles) if (auto Sym = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return nullptr; } - void removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { + Error removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { for (auto &BLH : BaseLayerHandles) - BaseLayer.removeModule(BLH); + if (auto Err = BaseLayer.removeModule(BLH)) + return Err; + return Error::success(); } - std::unique_ptr ExternalSymbolResolver; - std::unique_ptr> MemMgr; + std::shared_ptr ExternalSymbolResolver; std::unique_ptr StubsMgr; StaticGlobalRenamer StaticRenamer; - ModuleAdderFtor ModuleAdder; SourceModulesList SourceModules; std::vector BaseLayerHandles; }; @@ -196,6 +198,7 @@ class CompileOnDemandLayer { using LogicalDylibList = std::list; public: + /// @brief Handle to loaded module. using ModuleHandleT = typename LogicalDylibList::iterator; @@ -217,48 +220,41 @@ class CompileOnDemandLayer { CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} ~CompileOnDemandLayer() { + // FIXME: Report error on log. while (!LogicalDylibs.empty()) - removeModule(LogicalDylibs.begin()); + consumeError(removeModule(LogicalDylibs.begin())); } /// @brief Add a module to the compile-on-demand layer. - template - ModuleHandleT addModule(std::shared_ptr M, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { LogicalDylibs.push_back(LogicalDylib()); auto &LD = LogicalDylibs.back(); LD.ExternalSymbolResolver = std::move(Resolver); LD.StubsMgr = CreateIndirectStubsManager(); - auto &MemMgrRef = *MemMgr; - LD.MemMgr = wrapOwnership(std::move(MemMgr)); - - LD.ModuleAdder = - [&MemMgrRef](BaseLayerT &B, std::unique_ptr M, - std::unique_ptr R) { - return B.addModule(std::move(M), &MemMgrRef, std::move(R)); - }; - // Process each of the modules in this module set. - addLogicalModule(LogicalDylibs.back(), std::move(M)); + if (auto Err = addLogicalModule(LD, std::move(M))) + return std::move(Err); return std::prev(LogicalDylibs.end()); } /// @brief Add extra modules to an existing logical module. - void addExtraModule(ModuleHandleT H, std::shared_ptr M) { - addLogicalModule(*H, std::move(M)); + Error addExtraModule(ModuleHandleT H, std::shared_ptr M) { + return addLogicalModule(*H, std::move(M)); } /// @brief Remove the module represented by the given handle. /// /// This will remove all modules in the layers below that were derived from /// the module represented by H. - void removeModule(ModuleHandleT H) { - H->removeModulesFromBaseLayer(BaseLayer); + Error removeModule(ModuleHandleT H) { + auto Err = H->removeModulesFromBaseLayer(BaseLayer); LogicalDylibs.erase(H); + return Err; } /// @brief Search for the given named symbol. @@ -272,6 +268,8 @@ class CompileOnDemandLayer { return Sym; if (auto Sym = findSymbolIn(LDI, Name, ExportedSymbolsOnly)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); } return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } @@ -309,8 +307,9 @@ class CompileOnDemandLayer { } private: - template - void addLogicalModule(LogicalDylib &LD, ModulePtrT SrcMPtr) { + + Error addLogicalModule(LogicalDylib &LD, std::shared_ptr SrcMPtr) { + // Rename all static functions / globals to $static.X : // This will unique the names across all modules in the logical dylib, // simplifying symbol lookup. @@ -322,7 +321,7 @@ class CompileOnDemandLayer { // Create a logical module handle for SrcM within the logical dylib. Module &SrcM = *SrcMPtr; - auto LMId = LD.addSourceModule(wrapOwnership(std::move(SrcMPtr))); + auto LMId = LD.addSourceModule(std::move(SrcMPtr)); // Create stub functions. const DataLayout &DL = SrcM.getDataLayout(); @@ -335,9 +334,12 @@ class CompileOnDemandLayer { // Skip weak functions for which we already have definitions. auto MangledName = mangle(F.getName(), DL); - if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) + if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) { if (auto Sym = LD.findSymbol(BaseLayer, MangledName, false)) continue; + else if (auto Err = Sym.takeError()) + return std::move(Err); + } // Record all functions defined by this module. if (CloneStubsIntoPartitions) @@ -350,9 +352,15 @@ class CompileOnDemandLayer { StubInits[MangledName] = std::make_pair(CCInfo.getAddress(), JITSymbolFlags::fromGlobalValue(F)); - CCInfo.setCompileAction([this, &LD, LMId, &F]() { - return this->extractAndCompile(LD, LMId, F); - }); + CCInfo.setCompileAction([this, &LD, LMId, &F]() -> JITTargetAddress { + if (auto FnImplAddrOrErr = this->extractAndCompile(LD, LMId, F)) + return *FnImplAddrOrErr; + else { + // FIXME: Report error, return to 'abort' or something similar. + consumeError(FnImplAddrOrErr.takeError()); + return 0; + } + }); } auto EC = LD.StubsMgr->createStubs(StubInits); @@ -367,7 +375,7 @@ class CompileOnDemandLayer { // empty globals module. if (SrcM.global_empty() && SrcM.alias_empty() && !SrcM.getModuleFlagsMetadata()) - return; + return Error::success(); // Create the GlobalValues module. auto GVsM = llvm::make_unique((SrcM.getName() + ".globals").str(), @@ -393,8 +401,9 @@ class CompileOnDemandLayer { // Initializers may refer to functions declared (but not defined) in this // module. Build a materializer to clone decls on demand. + Error MaterializerErrors = Error::success(); auto Materializer = createLambdaMaterializer( - [&LD, &GVsM](Value *V) -> Value* { + [&LD, &GVsM, &MaterializerErrors](Value *V) -> Value* { if (auto *F = dyn_cast(V)) { // Decls in the original module just get cloned. if (F->isDeclaration()) @@ -405,13 +414,24 @@ class CompileOnDemandLayer { // instead. const DataLayout &DL = GVsM->getDataLayout(); std::string FName = mangle(F->getName(), DL); - auto StubSym = LD.StubsMgr->findStub(FName, false); unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType()); - ConstantInt *StubAddr = - ConstantInt::get(GVsM->getContext(), - APInt(PtrBitWidth, StubSym.getAddress())); + JITTargetAddress StubAddr = 0; + + // Get the address for the stub. If we encounter an error while + // doing so, stash it in the MaterializerErrors variable and use a + // null address as a placeholder. + if (auto StubSym = LD.StubsMgr->findStub(FName, false)) { + if (auto StubAddrOrErr = StubSym.getAddress()) + StubAddr = *StubAddrOrErr; + else + MaterializerErrors = joinErrors(std::move(MaterializerErrors), + StubAddrOrErr.takeError()); + } + + ConstantInt *StubAddrCI = + ConstantInt::get(GVsM->getContext(), APInt(PtrBitWidth, StubAddr)); Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr, - StubAddr, F->getType()); + StubAddrCI, F->getType()); return GlobalAlias::create(F->getFunctionType(), F->getType()->getAddressSpace(), F->getLinkage(), F->getName(), @@ -435,22 +455,31 @@ class CompileOnDemandLayer { NewA->setAliasee(cast(Init)); } + if (MaterializerErrors) + return MaterializerErrors; + // Build a resolver for the globals module and add it to the base layer. auto GVsResolver = createLambdaResolver( - [this, &LD](const std::string &Name) { + [this, &LD](const std::string &Name) -> JITSymbol { if (auto Sym = LD.StubsMgr->findStub(Name, false)) return Sym; if (auto Sym = LD.findSymbol(BaseLayer, Name, false)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name); }, [&LD](const std::string &Name) { return LD.ExternalSymbolResolver->findSymbol(Name); }); - auto GVsH = LD.ModuleAdder(BaseLayer, std::move(GVsM), - std::move(GVsResolver)); - LD.BaseLayerHandles.push_back(GVsH); + if (auto GVsHOrErr = + BaseLayer.addModule(std::move(GVsM), std::move(GVsResolver))) + LD.BaseLayerHandles.push_back(*GVsHOrErr); + else + return GVsHOrErr.takeError(); + + return Error::success(); } static std::string mangle(StringRef Name, const DataLayout &DL) { @@ -462,7 +491,7 @@ class CompileOnDemandLayer { return MangledName; } - JITTargetAddress + Expected extractAndCompile(LogicalDylib &LD, typename LogicalDylib::SourceModuleHandle LMId, Function &F) { @@ -475,34 +504,42 @@ class CompileOnDemandLayer { // Grab the name of the function being called here. std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout()); - auto Part = Partition(F); - auto PartH = emitPartition(LD, LMId, Part); - JITTargetAddress CalledAddr = 0; - for (auto *SubF : Part) { - std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); - auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false); - assert(FnBodySym && "Couldn't find function body."); + auto Part = Partition(F); + if (auto PartHOrErr = emitPartition(LD, LMId, Part)) { + auto &PartH = *PartHOrErr; + for (auto *SubF : Part) { + std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); + if (auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false)) { + if (auto FnBodyAddrOrErr = FnBodySym.getAddress()) { + JITTargetAddress FnBodyAddr = *FnBodyAddrOrErr; - JITTargetAddress FnBodyAddr = FnBodySym.getAddress(); + // If this is the function we're calling record the address so we can + // return it from this function. + if (SubF == &F) + CalledAddr = FnBodyAddr; - // If this is the function we're calling record the address so we can - // return it from this function. - if (SubF == &F) - CalledAddr = FnBodyAddr; + // Update the function body pointer for the stub. + if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr)) + return 0; - // Update the function body pointer for the stub. - if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr)) - return 0; - } + } else + return FnBodyAddrOrErr.takeError(); + } else if (auto Err = FnBodySym.takeError()) + return std::move(Err); + else + llvm_unreachable("Function not emitted for partition"); + } - LD.BaseLayerHandles.push_back(PartH); + LD.BaseLayerHandles.push_back(PartH); + } else + return PartHOrErr.takeError(); return CalledAddr; } template - BaseLayerModuleHandleT + Expected emitPartition(LogicalDylib &LD, typename LogicalDylib::SourceModuleHandle LMId, const PartitionT &Part) { @@ -566,16 +603,18 @@ class CompileOnDemandLayer { // Create memory manager and symbol resolver. auto Resolver = createLambdaResolver( - [this, &LD](const std::string &Name) { + [this, &LD](const std::string &Name) -> JITSymbol { if (auto Sym = LD.findSymbol(BaseLayer, Name, false)) return Sym; + else if (auto Err = Sym.takeError()) + return std::move(Err); return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name); }, [&LD](const std::string &Name) { return LD.ExternalSymbolResolver->findSymbol(Name); }); - return LD.ModuleAdder(BaseLayer, std::move(M), std::move(Resolver)); + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } BaseLayerT &BaseLayer; diff --git a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index bf8cca406844..d9b45c6a1e29 100644 --- a/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -17,6 +17,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/iterator_range.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/Orc/OrcError.h" #include #include #include @@ -99,19 +101,24 @@ class CtorDtorRunner { /// @brief Run the recorded constructors/destructors through the given JIT /// layer. - bool runViaLayer(JITLayerT &JITLayer) const { + Error runViaLayer(JITLayerT &JITLayer) const { using CtorDtorTy = void (*)(); - bool Error = false; for (const auto &CtorDtorName : CtorDtorNames) if (auto CtorDtorSym = JITLayer.findSymbolIn(H, CtorDtorName, false)) { - CtorDtorTy CtorDtor = - reinterpret_cast( - static_cast(CtorDtorSym.getAddress())); - CtorDtor(); - } else - Error = true; - return !Error; + if (auto AddrOrErr = CtorDtorSym.getAddress()) { + CtorDtorTy CtorDtor = + reinterpret_cast(static_cast(*AddrOrErr)); + CtorDtor(); + } else + return AddrOrErr.takeError(); + } else { + if (auto Err = CtorDtorSym.takeError()) + return Err; + else + return make_error(CtorDtorName); + } + return Error::success(); } private: diff --git a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h index d582e9a33241..ff54ef625ebb 100644 --- a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h @@ -17,9 +17,14 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include +#include #include namespace llvm { + +class Module; +class JITSymbolResolver; + namespace orc { /// @brief Global mapping layer. @@ -32,25 +37,22 @@ namespace orc { template class GlobalMappingLayer { public: - /// @brief Handle to a set of added modules. - using ModuleSetHandleT = typename BaseLayerT::ModuleSetHandleT; + + /// @brief Handle to an added module. + using ModuleHandleT = typename BaseLayerT::ModuleHandleT; /// @brief Construct an GlobalMappingLayer with the given BaseLayer GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} - /// @brief Add the given module set to the JIT. + /// @brief Add the given module to the JIT. /// @return A handle for the added modules. - template - ModuleSetHandleT addModuleSet(ModuleSetT Ms, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr), - std::move(Resolver)); + ModuleHandleT addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } /// @brief Remove the module set associated with the handle H. - void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); } + void removeModule(ModuleHandleT H) { BaseLayer.removeModule(H); } /// @brief Manually set the address to return for the given symbol. void setGlobalMapping(const std::string &Name, JITTargetAddress Addr) { @@ -78,15 +80,15 @@ class GlobalMappingLayer { return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); } - /// @brief Get the address of the given symbol in the context of the set of - /// modules represented by the handle H. This call is forwarded to the + /// @brief Get the address of the given symbol in the context of the of the + /// module represented by the handle H. This call is forwarded to the /// base layer's implementation. - /// @param H The handle for the module set to search in. + /// @param H The handle for the module to search in. /// @param Name The name of the symbol to search for. /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it is found in the - /// given module set. - JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name, + /// given module. + JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, bool ExportedSymbolsOnly) { return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); } @@ -94,7 +96,7 @@ class GlobalMappingLayer { /// @brief Immediately emit and finalize the module set represented by the /// given handle. /// @param H Handle for module set to emit/finalize. - void emitAndFinalize(ModuleSetHandleT H) { + void emitAndFinalize(ModuleHandleT H) { BaseLayer.emitAndFinalize(H); } diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index 99ccd4d221a5..fadd334bed0f 100644 --- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -50,18 +50,18 @@ class IRCompileLayer { /// along with the given memory manager and symbol resolver. /// /// @return A handle for the added module. - template - ModuleHandleT addModule(std::shared_ptr M, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { using CompileResult = decltype(Compile(*M)); auto Obj = std::make_shared(Compile(*M)); - return BaseLayer.addObject(std::move(Obj), std::move(MemMgr), - std::move(Resolver)); + return BaseLayer.addObject(std::move(Obj), std::move(Resolver)); } /// @brief Remove the module associated with the handle H. - void removeModule(ModuleHandleT H) { BaseLayer.removeObject(H); } + Error removeModule(ModuleHandleT H) { + return BaseLayer.removeObject(H); + } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -87,8 +87,8 @@ class IRCompileLayer { /// @brief Immediately emit and finalize the module represented by the given /// handle. /// @param H Handle for module to emit/finalize. - void emitAndFinalize(ModuleHandleT H) { - BaseLayer.emitAndFinalize(H); + Error emitAndFinalize(ModuleHandleT H) { + return BaseLayer.emitAndFinalize(H); } private: diff --git a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index cf6556a33bbd..476061afda59 100644 --- a/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -42,16 +42,14 @@ class IRTransformLayer { /// the layer below, along with the memory manager and symbol resolver. /// /// @return A handle for the added modules. - template - ModuleHandleT addModule(std::shared_ptr M, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - return BaseLayer.addModule(Transform(std::move(M)), std::move(MemMgr), - std::move(Resolver)); + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { + return BaseLayer.addModule(Transform(std::move(M)), std::move(Resolver)); } /// @brief Remove the module associated with the handle H. - void removeModule(ModuleHandleT H) { BaseLayer.removeModule(H); } + Error removeModule(ModuleHandleT H) { return BaseLayer.removeModule(H); } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -77,8 +75,8 @@ class IRTransformLayer { /// @brief Immediately emit and finalize the module represented by the given /// handle. /// @param H Handle for module to emit/finalize. - void emitAndFinalize(ModuleHandleT H) { - BaseLayer.emitAndFinalize(H); + Error emitAndFinalize(ModuleHandleT H) { + return BaseLayer.emitAndFinalize(H); } /// @brief Access the transform functor directly. diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index 6868640d38e8..228392ae0d4a 100644 --- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -45,7 +45,7 @@ class LambdaResolver : public JITSymbolResolver { template -std::unique_ptr> +std::shared_ptr> createLambdaResolver(DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) { using LR = LambdaResolver; diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h index 38769aac12af..6c951fab6185 100644 --- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h @@ -46,8 +46,9 @@ template class LazyEmittingLayer { private: class EmissionDeferredModule { public: - EmissionDeferredModule() = default; - virtual ~EmissionDeferredModule() = default; + EmissionDeferredModule(std::shared_ptr M, + std::shared_ptr Resolver) + : M(std::move(M)), Resolver(std::move(Resolver)) {} JITSymbol find(StringRef Name, bool ExportedSymbolsOnly, BaseLayerT &B) { switch (EmitState) { @@ -59,16 +60,24 @@ template class LazyEmittingLayer { std::string PName = Name; JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV); auto GetAddress = - [this, ExportedSymbolsOnly, PName, &B]() -> JITTargetAddress { + [this, ExportedSymbolsOnly, PName, &B]() -> Expected { if (this->EmitState == Emitting) return 0; else if (this->EmitState == NotEmitted) { this->EmitState = Emitting; - Handle = this->emitToBaseLayer(B); + if (auto HandleOrErr = this->emitToBaseLayer(B)) + Handle = std::move(*HandleOrErr); + else + return HandleOrErr.takeError(); this->EmitState = Emitted; } - auto Sym = B.findSymbolIn(Handle, PName, ExportedSymbolsOnly); - return Sym.getAddress(); + if (auto Sym = B.findSymbolIn(Handle, PName, ExportedSymbolsOnly)) + return Sym.getAddress(); + else if (auto Err = Sym.takeError()) + return std::move(Err); + else + llvm_unreachable("Successful symbol lookup should return " + "definition address here"); }; return JITSymbol(std::move(GetAddress), Flags); } else @@ -101,33 +110,10 @@ template class LazyEmittingLayer { BaseLayer.emitAndFinalize(Handle); } - template - static std::unique_ptr - create(BaseLayerT &B, std::shared_ptr M, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver); - - protected: - virtual const GlobalValue* searchGVs(StringRef Name, - bool ExportedSymbolsOnly) const = 0; - virtual BaseLayerHandleT emitToBaseLayer(BaseLayerT &BaseLayer) = 0; - private: - enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted; - BaseLayerHandleT Handle; - }; - template - class EmissionDeferredModuleImpl : public EmissionDeferredModule { - public: - EmissionDeferredModuleImpl(std::shared_ptr M, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) - : M(std::move(M)), MemMgr(std::move(MemMgr)), - Resolver(std::move(Resolver)) {} - - protected: const GlobalValue* searchGVs(StringRef Name, - bool ExportedSymbolsOnly) const override { + bool ExportedSymbolsOnly) const { // FIXME: We could clean all this up if we had a way to reliably demangle // names: We could just demangle name and search, rather than // mangling everything else. @@ -149,15 +135,13 @@ template class LazyEmittingLayer { return buildMangledSymbols(Name, ExportedSymbolsOnly); } - BaseLayerHandleT emitToBaseLayer(BaseLayerT &BaseLayer) override { + Expected emitToBaseLayer(BaseLayerT &BaseLayer) { // We don't need the mangled names set any more: Once we've emitted this // to the base layer we'll just look for symbols there. MangledSymbols.reset(); - return BaseLayer.addModule(std::move(M), std::move(MemMgr), - std::move(Resolver)); + return BaseLayer.addModule(std::move(M), std::move(Resolver)); } - private: // If the mangled name of the given GlobalValue matches the given search // name (and its visibility conforms to the ExportedSymbolsOnly flag) then // return the symbol. Otherwise, add the mangled name to the Names map and @@ -207,9 +191,10 @@ template class LazyEmittingLayer { return nullptr; } + enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted; + BaseLayerHandleT Handle; std::shared_ptr M; - MemoryManagerPtrT MemMgr; - SymbolResolverPtrT Resolver; + std::shared_ptr Resolver; mutable std::unique_ptr> MangledSymbols; }; @@ -219,6 +204,7 @@ template class LazyEmittingLayer { ModuleListT ModuleList; public: + /// @brief Handle to a loaded module. using ModuleHandleT = typename ModuleListT::iterator; @@ -226,24 +212,23 @@ template class LazyEmittingLayer { LazyEmittingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} /// @brief Add the given module to the lazy emitting layer. - template - ModuleHandleT addModule(std::shared_ptr M, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { + Expected + addModule(std::shared_ptr M, + std::shared_ptr Resolver) { return ModuleList.insert( ModuleList.end(), - EmissionDeferredModule::create(BaseLayer, std::move(M), - std::move(MemMgr), - std::move(Resolver))); + llvm::make_unique(std::move(M), + std::move(Resolver))); } /// @brief Remove the module represented by the given handle. /// /// This method will free the memory associated with the given module, both /// in this layer, and the base layer. - void removeModule(ModuleHandleT H) { + Error removeModule(ModuleHandleT H) { (*H)->removeModuleFromBaseLayer(BaseLayer); ModuleList.erase(H); + return Error::success(); } /// @brief Search for the given named symbol. @@ -276,22 +261,11 @@ template class LazyEmittingLayer { /// @brief Immediately emit and finalize the module represented by the given /// handle. /// @param H Handle for module to emit/finalize. - void emitAndFinalize(ModuleHandleT H) { - (*H)->emitAndFinalize(BaseLayer); + Error emitAndFinalize(ModuleHandleT H) { + return (*H)->emitAndFinalize(BaseLayer); } }; -template -template -std::unique_ptr::EmissionDeferredModule> -LazyEmittingLayer::EmissionDeferredModule::create( - BaseLayerT &B, std::shared_ptr M, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - using EDS = EmissionDeferredModuleImpl; - return llvm::make_unique(std::move(M), std::move(MemMgr), - std::move(Resolver)); -} - } // end namespace orc } // end namespace llvm diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index c41c1233c0d9..cb47e7520b1a 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -16,6 +16,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include +#include #include namespace llvm { @@ -42,16 +43,14 @@ class ObjectTransformLayer { /// memory manager and symbol resolver. /// /// @return A handle for the added objects. - template - ObjHandleT addObject(ObjPtrT Obj, MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - return BaseLayer.addObject(Transform(std::move(Obj)), std::move(MemMgr), - std::move(Resolver)); + template + Expected addObject(ObjectPtr Obj, + std::shared_ptr Resolver) { + return BaseLayer.addObject(Transform(std::move(Obj)), std::move(Resolver)); } /// @brief Remove the object set associated with the handle H. - void removeObject(ObjHandleT H) { BaseLayer.removeObject(H); } + Error removeObject(ObjHandleT H) { return BaseLayer.removeObject(H); } /// @brief Search for the given named symbol. /// @param Name The name of the symbol to search for. @@ -77,7 +76,9 @@ class ObjectTransformLayer { /// @brief Immediately emit and finalize the object set represented by the /// given handle. /// @param H Handle for object set to emit/finalize. - void emitAndFinalize(ObjHandleT H) { BaseLayer.emitAndFinalize(H); } + Error emitAndFinalize(ObjHandleT H) { + return BaseLayer.emitAndFinalize(H); + } /// @brief Map section addresses for the objects associated with the handle H. void mapSectionAddress(ObjHandleT H, const void *LocalAddress, diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h index cbb40fad0223..e6374b70967a 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcError.h +++ b/include/llvm/ExecutionEngine/Orc/OrcError.h @@ -22,7 +22,8 @@ namespace orc { enum class OrcErrorCode : int { // RPC Errors - RemoteAllocatorDoesNotExist = 1, + JITSymbolNotFound = 1, + RemoteAllocatorDoesNotExist, RemoteAllocatorIdAlreadyInUse, RemoteMProtectAddrUnrecognized, RemoteIndirectStubsOwnerDoesNotExist, @@ -37,6 +38,18 @@ enum class OrcErrorCode : int { std::error_code orcError(OrcErrorCode ErrCode); +class JITSymbolNotFound : public ErrorInfo { +public: + static char ID; + + JITSymbolNotFound(std::string SymbolName); + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; + const std::string &getSymbolName() const; +private: + std::string SymbolName; +}; + } // End namespace orc. } // End namespace llvm. diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 66ad36be01c8..e1016ef95f0c 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -228,13 +228,20 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { public: + /// @brief Functor for creating memory managers. + using MemoryManagerGetter = + std::function()>; + /// @brief Construct an ObjectLinkingLayer with the given NotifyLoaded, /// and NotifyFinalized functors. RTDyldObjectLinkingLayer( + MemoryManagerGetter GetMemMgr, NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(), NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor()) - : NotifyLoaded(std::move(NotifyLoaded)), - NotifyFinalized(std::move(NotifyFinalized)) {} + : GetMemMgr(GetMemMgr), + NotifyLoaded(std::move(NotifyLoaded)), + NotifyFinalized(std::move(NotifyFinalized)), + ProcessAllSections(false) {} /// @brief Set the 'ProcessAllSections' flag. /// @@ -251,12 +258,8 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// /// @return A handle that can be used to refer to the loaded objects (for /// symbol searching, finalization, freeing memory, etc.). - template - ObjHandleT addObject(ObjectPtr Obj, - MemoryManagerPtrT MemMgr, - SymbolResolverPtrT Resolver) { - + Expected addObject(ObjectPtr Obj, + std::shared_ptr Resolver) { auto Finalizer = [&](ObjHandleT H, RuntimeDyld &RTDyld, const ObjectPtr &ObjToLoad, std::function LOSHandleLoad) { @@ -275,8 +278,9 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { }; auto LO = - createLinkedObject(std::move(Obj), std::move(MemMgr), std::move(Resolver), - std::move(Finalizer), ProcessAllSections); + createLinkedObject(std::move(Obj), GetMemMgr(), + std::move(Resolver), std::move(Finalizer), + ProcessAllSections); // LOS is an owning-ptr. Keep a non-owning one so that we can set the handle // below. auto *LOPtr = LO.get(); @@ -295,9 +299,10 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// indirectly) will result in undefined behavior. If dependence tracking is /// required to detect or resolve such issues it should be added at a higher /// layer. - void removeObject(ObjHandleT H) { + Error removeObject(ObjHandleT H) { // How do we invalidate the symbols in H? LinkedObjList.erase(H); + return Error::success(); } /// @brief Search for the given named symbol. @@ -334,13 +339,15 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { /// @brief Immediately emit and finalize the object set represented by the /// given handle. /// @param H Handle for object set to emit/finalize. - void emitAndFinalize(ObjHandleT H) { + Error emitAndFinalize(ObjHandleT H) { (*H)->finalize(); + return Error::success(); } private: LinkedObjectListT LinkedObjList; + MemoryManagerGetter GetMemMgr; NotifyLoadedFtor NotifyLoaded; NotifyFinalizedFtor NotifyFinalized; bool ProcessAllSections = false; diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h index 1925489f7952..56aa04ce694a 100644 --- a/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -88,21 +88,6 @@ class RuntimeDyld { ObjSectionToIDMap ObjSecToIDMap; }; - template struct LoadedObjectInfoHelper : LoadedObjectInfo { - protected: - LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default; - LoadedObjectInfoHelper() = default; - - public: - LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, - LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) - : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {} - - std::unique_ptr clone() const override { - return llvm::make_unique(static_cast(*this)); - } - }; - /// \brief Memory Management. class MemoryManager { friend class RuntimeDyld; diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h index 8b3a90fa065b..2e72c41ccee3 100644 --- a/include/llvm/IR/Constants.h +++ b/include/llvm/IR/Constants.h @@ -680,11 +680,6 @@ class ConstantDataArray final : public ConstantDataSequential { explicit ConstantDataArray(Type *ty, const char *Data) : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {} - /// Allocate space for exactly zero operands. - void *operator new(size_t s) { - return User::operator new(s, 0); - } - public: ConstantDataArray(const ConstantDataArray &) = delete; @@ -739,11 +734,6 @@ class ConstantDataVector final : public ConstantDataSequential { explicit ConstantDataVector(Type *ty, const char *Data) : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {} - // allocate space for exactly zero operands. - void *operator new(size_t s) { - return User::operator new(s, 0); - } - public: ConstantDataVector(const ConstantDataVector &) = delete; diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index ec33f82f7022..5344a93efb33 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1062,7 +1062,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") { if (Constant *RC = dyn_cast(RHS)) { - if (isa(RC) && cast(RC)->isAllOnesValue()) + if (isa(RC) && cast(RC)->isMinusOne()) return LHS; // LHS & -1 -> LHS if (Constant *LC = dyn_cast(LHS)) return Insert(Folder.CreateAnd(LC, RC), Name); @@ -1203,22 +1203,22 @@ class IRBuilder : public IRBuilderBase, public Inserter { return SI; } FenceInst *CreateFence(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, const Twine &Name = "") { - return Insert(new FenceInst(Context, Ordering, SynchScope), Name); + return Insert(new FenceInst(Context, Ordering, SSID), Name); } AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { return Insert(new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, - FailureOrdering, SynchScope)); + FailureOrdering, SSID)); } AtomicRMWInst *CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { - return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SynchScope)); + SyncScope::ID SSID = SyncScope::System) { + return Insert(new AtomicRMWInst(Op, Ptr, Val, Ordering, SSID)); } Value *CreateGEP(Value *Ptr, ArrayRef IdxList, const Twine &Name = "") { @@ -1517,11 +1517,9 @@ class IRBuilder : public IRBuilderBase, public Inserter { const Twine &Name = "") { if (V->getType() == DestTy) return V; - if (V->getType()->getScalarType()->isPointerTy() && - DestTy->getScalarType()->isIntegerTy()) + if (V->getType()->isPtrOrPtrVectorTy() && DestTy->isIntOrIntVectorTy()) return CreatePtrToInt(V, DestTy, Name); - if (V->getType()->getScalarType()->isIntegerTy() && - DestTy->getScalarType()->isPointerTy()) + if (V->getType()->isIntOrIntVectorTy() && DestTy->isPtrOrPtrVectorTy()) return CreateIntToPtr(V, DestTy, Name); return CreateBitCast(V, DestTy, Name); diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index dc5f37450b48..60ae98869e55 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -52,11 +52,6 @@ class ConstantInt; class DataLayout; class LLVMContext; -enum SynchronizationScope { - SingleThread = 0, - CrossThread = 1 -}; - //===----------------------------------------------------------------------===// // AllocaInst Class //===----------------------------------------------------------------------===// @@ -195,17 +190,16 @@ class LoadInst : public UnaryInstruction { LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd); LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, - AtomicOrdering Order, SynchronizationScope SynchScope = CrossThread, + AtomicOrdering Order, SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr) : LoadInst(cast(Ptr->getType())->getElementType(), Ptr, - NameStr, isVolatile, Align, Order, SynchScope, InsertBefore) {} + NameStr, isVolatile, Align, Order, SSID, InsertBefore) {} LoadInst(Type *Ty, Value *Ptr, const Twine &NameStr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore); LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd); @@ -235,34 +229,34 @@ class LoadInst : public UnaryInstruction { void setAlignment(unsigned Align); - /// Returns the ordering effect of this fence. + /// Returns the ordering constraint of this load instruction. AtomicOrdering getOrdering() const { return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7); } - /// Set the ordering constraint on this load. May not be Release or - /// AcquireRelease. + /// Sets the ordering constraint of this load instruction. May not be Release + /// or AcquireRelease. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) | ((unsigned)Ordering << 7)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1); + /// Returns the synchronization scope ID of this load instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this load is ordered with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) | - (xthread << 6)); + /// Sets the synchronization scope ID of this load instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } + /// Sets the ordering constraint and the synchronization scope ID of this load + /// instruction. void setAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } bool isSimple() const { return !isAtomic() && !isVolatile(); } @@ -297,6 +291,11 @@ class LoadInst : public UnaryInstruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this load instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; //===----------------------------------------------------------------------===// @@ -325,11 +324,10 @@ class StoreInst : public Instruction { unsigned Align, BasicBlock *InsertAtEnd); StoreInst(Value *Val, Value *Ptr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); StoreInst(Value *Val, Value *Ptr, bool isVolatile, - unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + unsigned Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands @@ -356,34 +354,34 @@ class StoreInst : public Instruction { void setAlignment(unsigned Align); - /// Returns the ordering effect of this store. + /// Returns the ordering constraint of this store instruction. AtomicOrdering getOrdering() const { return AtomicOrdering((getSubclassDataFromInstruction() >> 7) & 7); } - /// Set the ordering constraint on this store. May not be Acquire or - /// AcquireRelease. + /// Sets the ordering constraint of this store instruction. May not be + /// Acquire or AcquireRelease. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & ~(7 << 7)) | ((unsigned)Ordering << 7)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() >> 6) & 1); + /// Returns the synchronization scope ID of this store instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this store instruction is ordered with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~(1 << 6)) | - (xthread << 6)); + /// Sets the synchronization scope ID of this store instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } + /// Sets the ordering constraint and the synchronization scope ID of this + /// store instruction. void setAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread) { + SyncScope::ID SSID = SyncScope::System) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } bool isSimple() const { return !isAtomic() && !isVolatile(); } @@ -421,6 +419,11 @@ class StoreInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this store instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; template <> @@ -435,7 +438,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) /// An instruction for ordering other memory operations. class FenceInst : public Instruction { - void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope); + void Init(AtomicOrdering Ordering, SyncScope::ID SSID); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -447,10 +450,9 @@ class FenceInst : public Instruction { // Ordering may only be Acquire, Release, AcquireRelease, or // SequentiallyConsistent. FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope = CrossThread, + SyncScope::ID SSID = SyncScope::System, Instruction *InsertBefore = nullptr); - FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + FenceInst(LLVMContext &C, AtomicOrdering Ordering, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly zero operands @@ -458,28 +460,26 @@ class FenceInst : public Instruction { return User::operator new(s, 0); } - /// Returns the ordering effect of this fence. + /// Returns the ordering constraint of this fence instruction. AtomicOrdering getOrdering() const { return AtomicOrdering(getSubclassDataFromInstruction() >> 1); } - /// Set the ordering constraint on this fence. May only be Acquire, Release, - /// AcquireRelease, or SequentiallyConsistent. + /// Sets the ordering constraint of this fence instruction. May only be + /// Acquire, Release, AcquireRelease, or SequentiallyConsistent. void setOrdering(AtomicOrdering Ordering) { setInstructionSubclassData((getSubclassDataFromInstruction() & 1) | ((unsigned)Ordering << 1)); } - SynchronizationScope getSynchScope() const { - return SynchronizationScope(getSubclassDataFromInstruction() & 1); + /// Returns the synchronization scope ID of this fence instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Specify whether this fence orders other operations with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope xthread) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) | - xthread); + /// Sets the synchronization scope ID of this fence instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -496,6 +496,11 @@ class FenceInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this fence instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; //===----------------------------------------------------------------------===// @@ -509,7 +514,7 @@ class FenceInst : public Instruction { class AtomicCmpXchgInst : public Instruction { void Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); protected: // Note: Instruction needs to be a friend here to call cloneImpl. @@ -521,13 +526,11 @@ class AtomicCmpXchgInst : public Instruction { AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, - Instruction *InsertBefore = nullptr); + SyncScope::ID SSID, Instruction *InsertBefore = nullptr); AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, - BasicBlock *InsertAtEnd); + SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly three operands void *operator new(size_t s) { @@ -561,7 +564,12 @@ class AtomicCmpXchgInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// Set the ordering constraint on this cmpxchg. + /// Returns the success ordering constraint of this cmpxchg instruction. + AtomicOrdering getSuccessOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); + } + + /// Sets the success ordering constraint of this cmpxchg instruction. void setSuccessOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "CmpXchg instructions can only be atomic."); @@ -569,6 +577,12 @@ class AtomicCmpXchgInst : public Instruction { ((unsigned)Ordering << 2)); } + /// Returns the failure ordering constraint of this cmpxchg instruction. + AtomicOrdering getFailureOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 5) & 7); + } + + /// Sets the failure ordering constraint of this cmpxchg instruction. void setFailureOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "CmpXchg instructions can only be atomic."); @@ -576,28 +590,14 @@ class AtomicCmpXchgInst : public Instruction { ((unsigned)Ordering << 5)); } - /// Specify whether this cmpxchg is atomic and orders other operations with - /// respect to all concurrently executing threads, or only with respect to - /// signal handlers executing in the same thread. - void setSynchScope(SynchronizationScope SynchScope) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) | - (SynchScope << 1)); + /// Returns the synchronization scope ID of this cmpxchg instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Returns the ordering constraint on this cmpxchg. - AtomicOrdering getSuccessOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); - } - - /// Returns the ordering constraint on this cmpxchg. - AtomicOrdering getFailureOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 5) & 7); - } - - /// Returns whether this cmpxchg is atomic between threads or only within a - /// single thread. - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1); + /// Sets the synchronization scope ID of this cmpxchg instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } Value *getPointerOperand() { return getOperand(0); } @@ -652,6 +652,11 @@ class AtomicCmpXchgInst : public Instruction { void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this cmpxchg instruction. Not quite + /// enough room in SubClassData for everything, so synchronization scope ID + /// gets its own field. + SyncScope::ID SSID; }; template <> @@ -711,10 +716,10 @@ class AtomicRMWInst : public Instruction { }; AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope, + AtomicOrdering Ordering, SyncScope::ID SSID, Instruction *InsertBefore = nullptr); AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope, + AtomicOrdering Ordering, SyncScope::ID SSID, BasicBlock *InsertAtEnd); // allocate space for exactly two operands @@ -748,7 +753,12 @@ class AtomicRMWInst : public Instruction { /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); - /// Set the ordering constraint on this RMW. + /// Returns the ordering constraint of this rmw instruction. + AtomicOrdering getOrdering() const { + return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); + } + + /// Sets the ordering constraint of this rmw instruction. void setOrdering(AtomicOrdering Ordering) { assert(Ordering != AtomicOrdering::NotAtomic && "atomicrmw instructions can only be atomic."); @@ -756,23 +766,14 @@ class AtomicRMWInst : public Instruction { ((unsigned)Ordering << 2)); } - /// Specify whether this RMW orders other operations with respect to all - /// concurrently executing threads, or only with respect to signal handlers - /// executing in the same thread. - void setSynchScope(SynchronizationScope SynchScope) { - setInstructionSubclassData((getSubclassDataFromInstruction() & ~2) | - (SynchScope << 1)); + /// Returns the synchronization scope ID of this rmw instruction. + SyncScope::ID getSyncScopeID() const { + return SSID; } - /// Returns the ordering constraint on this RMW. - AtomicOrdering getOrdering() const { - return AtomicOrdering((getSubclassDataFromInstruction() >> 2) & 7); - } - - /// Returns whether this RMW is atomic between threads or only within a - /// single thread. - SynchronizationScope getSynchScope() const { - return SynchronizationScope((getSubclassDataFromInstruction() & 2) >> 1); + /// Sets the synchronization scope ID of this rmw instruction. + void setSyncScopeID(SyncScope::ID SSID) { + this->SSID = SSID; } Value *getPointerOperand() { return getOperand(0); } @@ -797,13 +798,18 @@ class AtomicRMWInst : public Instruction { private: void Init(BinOp Operation, Value *Ptr, Value *Val, - AtomicOrdering Ordering, SynchronizationScope SynchScope); + AtomicOrdering Ordering, SyncScope::ID SSID); // Shadow Instruction::setInstructionSubclassData with a private forwarding // method so that subclasses cannot accidentally use it. void setInstructionSubclassData(unsigned short D) { Instruction::setInstructionSubclassData(D); } + + /// The synchronization scope ID of this rmw instruction. Not quite enough + /// room in SubClassData for everything, so synchronization scope ID gets its + /// own field. + SyncScope::ID SSID; }; template <> @@ -1101,8 +1107,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// Represent an integer comparison operator. class ICmpInst: public CmpInst { void AssertOK() { - assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - getPredicate() <= CmpInst::LAST_ICMP_PREDICATE && + assert(isIntPredicate() && "Invalid ICmp predicate value"); assert(getOperand(0)->getType() == getOperand(1)->getType() && "Both operands to ICmp instruction are not of the same type!"); @@ -1244,8 +1249,7 @@ class ICmpInst: public CmpInst { /// Represents a floating point comparison operator. class FCmpInst: public CmpInst { void AssertOK() { - assert(getPredicate() <= FCmpInst::LAST_FCMP_PREDICATE && - "Invalid FCmp predicate value"); + assert(isFPPredicate() && "Invalid FCmp predicate value"); assert(getOperand(0)->getType() == getOperand(1)->getType() && "Both operands to FCmp instruction are not of the same type!"); // Check that the operands are the right type diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h index 944af57a7800..f55d17ec72c8 100644 --- a/include/llvm/IR/IntrinsicInst.h +++ b/include/llvm/IR/IntrinsicInst.h @@ -296,6 +296,175 @@ namespace llvm { } }; + class ElementUnorderedAtomicMemMoveInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_SOURCE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + /// Return the arguments to the instruction. + Value *getRawSource() const { + return const_cast(getArgOperand(ARG_SOURCE)); + } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + /// This is just like getRawSource, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getSource() const { return getRawSource()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + unsigned getSourceAddressSpace() const { + return cast(getRawSource()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setSource(Value *Ptr) { + assert(getRawSource()->getType() == Ptr->getType() && + "setSource called with pointer of wrong type!"); + setArgOperand(ARG_SOURCE, Ptr); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memmove_element_unordered_atomic; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + + /// This class represents atomic memset intrinsic + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemSetInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + Value *getValue() const { return const_cast(getArgOperand(ARG_VALUE)); } + const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } + Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This is the common base class for memset/memcpy/memmove. class MemIntrinsic : public IntrinsicInst { public: diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 45936a6e9b66..14c88e519435 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -873,6 +873,22 @@ def int_memcpy_element_unordered_atomic ReadOnly<1> ]>; +// @llvm.memmove.element.unordered.atomic.*(dest, src, length, elementsize) +def int_memmove_element_unordered_atomic + : Intrinsic<[], + [ + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i32_ty + ], + [ + IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, + ReadOnly<1> + ]>; + +// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) +def int_memset_element_unordered_atomic + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], + [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>; + //===------------------------ Reduction Intrinsics ------------------------===// // def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty], diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index b27abad618c9..4cb77701f762 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -42,6 +42,24 @@ class Output; } // end namespace yaml +namespace SyncScope { + +typedef uint8_t ID; + +/// Known synchronization scope IDs, which always have the same value. All +/// synchronization scope IDs that LLVM has special knowledge of are listed +/// here. Additionally, this scheme allows LLVM to efficiently check for +/// specific synchronization scope ID without comparing strings. +enum { + /// Synchronized with respect to signal handlers executing in the same thread. + SingleThread = 0, + + /// Synchronized with respect to all concurrently executing threads. + System = 1 +}; + +} // end namespace SyncScope + /// This is an important class for using LLVM in a threaded context. It /// (opaquely) owns and manages the core "global" data of LLVM's core /// infrastructure, including the type and constant uniquing tables. @@ -111,6 +129,16 @@ class LLVMContext { /// tag registered with an LLVMContext has an unique ID. uint32_t getOperandBundleTagID(StringRef Tag) const; + /// getOrInsertSyncScopeID - Maps synchronization scope name to + /// synchronization scope ID. Every synchronization scope registered with + /// LLVMContext has unique ID except pre-defined ones. + SyncScope::ID getOrInsertSyncScopeID(StringRef SSN); + + /// getSyncScopeNames - Populates client supplied SmallVector with + /// synchronization scope names registered with LLVMContext. Synchronization + /// scope names are ordered by increasing synchronization scope IDs. + void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// Define the GC for a function void setGC(const Function &Fn, std::string GCName); diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index d47d82a57bff..196e32e3615c 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -249,7 +249,7 @@ class Module { /// when other randomness consuming passes are added or removed. In /// addition, the random stream will be reproducible across LLVM /// versions when the pass does not change. - RandomNumberGenerator *createRNG(const Pass* P) const; + std::unique_ptr createRNG(const Pass* P) const; /// @} /// @name Module Level Mutators diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index b43d58865862..4aa8a0199ab1 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -47,7 +47,13 @@ template struct MappingTraits; /// \brief Class to accumulate and hold information about a callee. struct CalleeInfo { - enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 }; + enum class HotnessType : uint8_t { + Unknown = 0, + Cold = 1, + None = 2, + Hot = 3, + Critical = 4 + }; HotnessType Hotness = HotnessType::Unknown; CalleeInfo() = default; @@ -516,7 +522,7 @@ using ModulePathStringTableTy = StringMap>; /// Map of global value GUID to its summary, used to identify values defined in /// a particular module, and provide efficient access to their summary. -using GVSummaryMapTy = std::map; +using GVSummaryMapTy = DenseMap; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index d03b7b65f81e..393175675034 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -162,6 +162,14 @@ class PreservedAnalyses { return PA; } + /// \brief Construct a preserved analyses object with a single preserved set. + template + static PreservedAnalyses allInSet() { + PreservedAnalyses PA; + PA.preserveSet(); + return PA; + } + /// Mark an analysis as preserved. template void preserve() { preserve(AnalysisT::ID()); } @@ -1062,10 +1070,27 @@ class OuterAnalysisManagerProxy const AnalysisManagerT &getManager() const { return *AM; } - /// \brief Handle invalidation by ignoring it; this pass is immutable. + /// When invalidation occurs, remove any registered invalidation events. bool invalidate( - IRUnitT &, const PreservedAnalyses &, - typename AnalysisManager::Invalidator &) { + IRUnitT &IRUnit, const PreservedAnalyses &PA, + typename AnalysisManager::Invalidator &Inv) { + // Loop over the set of registered outer invalidation mappings and if any + // of them map to an analysis that is now invalid, clear it out. + SmallVector DeadKeys; + for (auto &KeyValuePair : OuterAnalysisInvalidationMap) { + AnalysisKey *OuterID = KeyValuePair.first; + auto &InnerIDs = KeyValuePair.second; + InnerIDs.erase(llvm::remove_if(InnerIDs, [&](AnalysisKey *InnerID) { + return Inv.invalidate(InnerID, IRUnit, PA); }), + InnerIDs.end()); + if (InnerIDs.empty()) + DeadKeys.push_back(OuterID); + } + + for (auto OuterID : DeadKeys) + OuterAnalysisInvalidationMap.erase(OuterID); + + // The proxy itself remains valid regardless of anything else. return false; } diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 5b69e7855cc7..acb895211644 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -158,12 +158,18 @@ struct match_neg_zero { /// zero inline match_neg_zero m_NegZero() { return match_neg_zero(); } +struct match_any_zero { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isZeroValue(); + return false; + } +}; + /// \brief - Match an arbitrary zero/null constant. This includes /// zero_initializer for vectors and ConstantPointerNull for pointers. For /// floating point constants, this will match negative zero and positive zero -inline match_combine_or m_AnyZero() { - return m_CombineOr(m_Zero(), m_NegZero()); -} +inline match_any_zero m_AnyZero() { return match_any_zero(); } struct match_nan { template bool match(ITy *V) { @@ -176,6 +182,39 @@ struct match_nan { /// Match an arbitrary NaN constant. This includes quiet and signalling nans. inline match_nan m_NaN() { return match_nan(); } +struct match_one { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isOneValue(); + return false; + } +}; + +/// \brief Match an integer 1 or a vector with all elements equal to 1. +inline match_one m_One() { return match_one(); } + +struct match_all_ones { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isAllOnesValue(); + return false; + } +}; + +/// \brief Match an integer or vector with all bits set to true. +inline match_all_ones m_AllOnes() { return match_all_ones(); } + +struct match_sign_mask { + template bool match(ITy *V) { + if (const auto *C = dyn_cast(V)) + return C->isMinSignedValue(); + return false; + } +}; + +/// \brief Match an integer or vector with only the sign bit(s) set. +inline match_sign_mask m_SignMask() { return match_sign_mask(); } + struct apint_match { const APInt *&Res; @@ -259,34 +298,6 @@ template struct api_pred_ty : public Predicate { } }; -struct is_one { - bool isValue(const APInt &C) { return C.isOneValue(); } -}; - -/// \brief Match an integer 1 or a vector with all elements equal to 1. -inline cst_pred_ty m_One() { return cst_pred_ty(); } -inline api_pred_ty m_One(const APInt *&V) { return V; } - -struct is_all_ones { - bool isValue(const APInt &C) { return C.isAllOnesValue(); } -}; - -/// \brief Match an integer or vector with all bits set to true. -inline cst_pred_ty m_AllOnes() { - return cst_pred_ty(); -} -inline api_pred_ty m_AllOnes(const APInt *&V) { return V; } - -struct is_sign_mask { - bool isValue(const APInt &C) { return C.isSignMask(); } -}; - -/// \brief Match an integer or vector with only the sign bit(s) set. -inline cst_pred_ty m_SignMask() { - return cst_pred_ty(); -} -inline api_pred_ty m_SignMask(const APInt *&V) { return V; } - struct is_power2 { bool isValue(const APInt &C) { return C.isPowerOf2(); } }; diff --git a/include/llvm/IR/SafepointIRVerifier.h b/include/llvm/IR/SafepointIRVerifier.h new file mode 100644 index 000000000000..092050d1d207 --- /dev/null +++ b/include/llvm/IR/SafepointIRVerifier.h @@ -0,0 +1,35 @@ +//===- SafepointIRVerifier.h - Checks for GC relocation problems *- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a verifier which is useful for enforcing the relocation +// properties required by a relocating GC. Specifically, it looks for uses of +// the unrelocated value of pointer SSA values after a possible safepoint. It +// attempts to report no false negatives, but may end up reporting false +// positives in rare cases (see the note at the top of the corresponding cpp +// file.) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_SAFEPOINT_IR_VERIFIER +#define LLVM_IR_SAFEPOINT_IR_VERIFIER + +namespace llvm { + +class Function; +class FunctionPass; + +/// Run the safepoint verifier over a single function. Crashes on failure. +void verifySafepointIR(Function &F); + +/// Create an instance of the safepoint verifier pass which can be added to +/// a pass pipeline to check for relocation bugs. +FunctionPass *createSafepointIRVerifierPass(); +} + +#endif // LLVM_IR_SAFEPOINT_IR_VERIFIER diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index b37b59288e3f..ef7801266777 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -202,6 +202,12 @@ class Type { /// Return true if this is an integer type or a vector of integer types. bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } + /// Return true if this is an integer type or a vector of integer types of + /// the given width. + bool isIntOrIntVectorTy(unsigned BitWidth) const { + return getScalarType()->isIntegerTy(BitWidth); + } + /// True if this is an instance of FunctionType. bool isFunctionTy() const { return getTypeID() == FunctionTyID; } diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index aab14070dbda..39ac4649b70d 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -303,6 +303,7 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); void initializeRABasicPass(PassRegistry&); +void initializeRAFastPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); @@ -318,6 +319,7 @@ void initializeResetMachineFunctionPass(PassRegistry&); void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&); void initializeRewriteStatepointsForGCPass(PassRegistry&); void initializeRewriteSymbolsLegacyPassPass(PassRegistry&); +void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSCCPLegacyPassPass(PassRegistry&); void initializeSCEVAAWrapperPassPass(PassRegistry&); void initializeSLPVectorizerPass(PassRegistry&); diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index c9c43a22da5d..5a8e29d08ad2 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -73,7 +73,7 @@ class MCAsmBackend { /// reported via \p Ctx. virtual void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const = 0; + uint64_t Value, bool IsResolved) const = 0; /// @} diff --git a/include/llvm/MC/MCDisassembler/MCDisassembler.h b/include/llvm/MC/MCDisassembler/MCDisassembler.h index 5e626f186986..7f09c05ccf2a 100644 --- a/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -68,6 +68,7 @@ class MCDisassembler { /// an invalid instruction. /// \param Address - The address, in the memory space of region, of the first /// byte of the instruction. + /// \param Bytes - A reference to the actual bytes of the instruction. /// \param VStream - The stream to print warnings and diagnostic messages on. /// \param CStream - The stream to print comments and annotations on. /// \return - MCDisassembler::Success if the instruction is valid, diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index 2d2480a27223..42dc90da3049 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -233,8 +233,7 @@ class MachObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void bindIndirectSymbols(MCAssembler &Asm); diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index 86bcbb6861d7..cd90690fb186 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -86,7 +86,7 @@ class MCObjectWriter { virtual void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) = 0; + uint64_t &FixedValue) = 0; /// Check whether the difference (A - B) between two symbol references is /// fully resolved. diff --git a/include/llvm/MC/MCSymbolWasm.h b/include/llvm/MC/MCSymbolWasm.h index 7ea89629efda..9bae6c582faa 100644 --- a/include/llvm/MC/MCSymbolWasm.h +++ b/include/llvm/MC/MCSymbolWasm.h @@ -21,6 +21,8 @@ class MCSymbolWasm : public MCSymbol { std::string ModuleName; SmallVector Returns; SmallVector Params; + bool ParamsSet = false; + bool ReturnsSet = false; /// An expression describing how to calculate the size of a symbol. If a /// symbol has no size this field will be NULL. @@ -45,15 +47,23 @@ class MCSymbolWasm : public MCSymbol { const StringRef getModuleName() const { return ModuleName; } - const SmallVector &getReturns() const { return Returns; } + const SmallVector &getReturns() const { + assert(ReturnsSet); + return Returns; + } void setReturns(SmallVectorImpl &&Rets) { + ReturnsSet = true; Returns = std::move(Rets); } - const SmallVector &getParams() const { return Params; } + const SmallVector &getParams() const { + assert(ParamsSet); + return Params; + } void setParams(SmallVectorImpl &&Pars) { + ParamsSet = true; Params = std::move(Pars); } }; diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index 78e0b5f6ed30..89c1ba6be35f 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -698,6 +698,9 @@ struct coff_resource_dir_entry { uint32_t getNameOffset() const { return maskTrailingOnes(31) & NameOffset; } + // Even though the PE/COFF spec doesn't mention this, the high bit of a name + // offset is set. + void setNameOffset(uint32_t Offset) { NameOffset = Offset | (1 << 31); } } Identifier; union { support::ulittle32_t DataEntryOffset; diff --git a/include/llvm/Object/Wasm.h b/include/llvm/Object/Wasm.h index 5c8445f10f44..07ee4a4d6c4d 100644 --- a/include/llvm/Object/Wasm.h +++ b/include/llvm/Object/Wasm.h @@ -61,7 +61,7 @@ class WasmSymbol { void print(raw_ostream &Out) const { Out << "Name=" << Name << ", Type=" << static_cast(Type) - << ", Flags=" << Flags; + << ", Flags=" << Flags << " ElemIndex=" << ElementIndex; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -69,8 +69,7 @@ class WasmSymbol { #endif }; -class WasmSection { -public: +struct WasmSection { WasmSection() = default; uint32_t Type = 0; // Section type (See below) @@ -80,6 +79,11 @@ class WasmSection { std::vector Relocations; // Relocations for this section }; +struct WasmSegment { + uint32_t SectionOffset; + wasm::WasmDataSegment Data; +}; + class WasmObjectFile : public ObjectFile { public: @@ -110,7 +114,7 @@ class WasmObjectFile : public ObjectFile { return ElemSegments; } - const std::vector& dataSegments() const { + const std::vector& dataSegments() const { return DataSegments; } @@ -210,7 +214,7 @@ class WasmObjectFile : public ObjectFile { std::vector Imports; std::vector Exports; std::vector ElemSegments; - std::vector DataSegments; + std::vector DataSegments; std::vector Functions; std::vector Symbols; ArrayRef CodeSection; diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index 6bf08d340eeb..709ad8ec3b77 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -98,7 +98,8 @@ struct Relocation { }; struct DataSegment { - uint32_t Index; + uint32_t MemoryIndex; + uint32_t SectionOffset; wasm::WasmInitExpr Offset; yaml::BinaryRef Content; }; diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h index 3e7b019a0d4e..a35e182f00e5 100644 --- a/include/llvm/Option/OptTable.h +++ b/include/llvm/Option/OptTable.h @@ -140,7 +140,8 @@ class OptTable { // to start with. /// /// \return The vector of flags which start with Cur. - std::vector findByPrefix(StringRef Cur) const; + std::vector findByPrefix(StringRef Cur, + unsigned short DisableFlags) const; /// \brief Parse a single argument; returning the new argument and /// updating Index. diff --git a/include/llvm/Passes/PassBuilder.h b/include/llvm/Passes/PassBuilder.h index ff1958397331..33433f6b4a10 100644 --- a/include/llvm/Passes/PassBuilder.h +++ b/include/llvm/Passes/PassBuilder.h @@ -46,6 +46,19 @@ class PassBuilder { Optional PGOOpt; public: + /// \brief A struct to capture parsed pass pipeline names. + /// + /// A pipeline is defined as a series of names, each of which may in itself + /// recursively contain a nested pipeline. A name is either the name of a pass + /// (e.g. "instcombine") or the name of a pipeline type (e.g. "cgscc"). If the + /// name is the name of a pass, the InnerPipeline is empty, since passes + /// cannot contain inner pipelines. See parsePassPipeline() for a more + /// detailed description of the textual pipeline format. + struct PipelineElement { + StringRef Name; + std::vector InnerPipeline; + }; + /// \brief LLVM-provided high-level optimization levels. /// /// This enumerates the LLVM-provided high-level optimization levels. Each @@ -188,9 +201,14 @@ class PassBuilder { /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PrepareForThinLTO = false); /// Construct the core LLVM module canonicalization and simplification /// pipeline. @@ -205,9 +223,14 @@ class PassBuilder { /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. + /// + /// \p PrepareForThinLTO indicates whether this is invoked in + /// PrepareForThinLTO phase. Special handling is needed for sample PGO to + /// ensure profile accurate in the backend profile annotation phase. ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool PrepareForThinLTO = false); /// Construct the core LLVM module optimization pipeline. /// @@ -302,7 +325,8 @@ class PassBuilder { /// registered. AAManager buildDefaultAAPipeline(); - /// \brief Parse a textual pass pipeline description into a \c ModulePassManager. + /// \brief Parse a textual pass pipeline description into a \c + /// ModulePassManager. /// /// The format of the textual pass pipeline description looks something like: /// @@ -312,8 +336,8 @@ class PassBuilder { /// are comma separated. As a special shortcut, if the very first pass is not /// a module pass (as a module pass manager is), this will automatically form /// the shortest stack of pass managers that allow inserting that first pass. - /// So, assuming function passes 'fpassN', CGSCC passes 'cgpassN', and loop passes - /// 'lpassN', all of these are valid: + /// So, assuming function passes 'fpassN', CGSCC passes 'cgpassN', and loop + /// passes 'lpassN', all of these are valid: /// /// fpass1,fpass2,fpass3 /// cgpass1,cgpass2,cgpass3 @@ -326,13 +350,28 @@ class PassBuilder { /// module(function(loop(lpass1,lpass2,lpass3))) /// /// This shortcut is especially useful for debugging and testing small pass - /// combinations. Note that these shortcuts don't introduce any other magic. If - /// the sequence of passes aren't all the exact same kind of pass, it will be - /// an error. You cannot mix different levels implicitly, you must explicitly - /// form a pass manager in which to nest passes. + /// combinations. Note that these shortcuts don't introduce any other magic. + /// If the sequence of passes aren't all the exact same kind of pass, it will + /// be an error. You cannot mix different levels implicitly, you must + /// explicitly form a pass manager in which to nest passes. bool parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, bool VerifyEachPass = true, bool DebugLogging = false); + /// {{@ Parse a textual pass pipeline description into a specific PassManager + /// + /// Automatic deduction of an appropriate pass manager stack is not supported. + /// For example, to insert a loop pass 'lpass' into a FunctinoPassManager, + /// this is the valid pipeline text: + /// + /// function(lpass) + bool parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + bool parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + bool parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText, + bool VerifyEachPass = true, bool DebugLogging = false); + /// @}} + /// Parse a textual alias analysis pipeline into the provided AA manager. /// /// The format of the textual AA pipeline is a comma separated list of AA @@ -350,13 +389,139 @@ class PassBuilder { /// returns false. bool parseAAPipeline(AAManager &AA, StringRef PipelineText); -private: - /// A struct to capture parsed pass pipeline names. - struct PipelineElement { - StringRef Name; - std::vector InnerPipeline; - }; + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding passes that perform peephole + /// optimizations similar to the instruction combiner. These passes will be + /// inserted after each instance of the instruction combiner pass. + void registerPeepholeEPCallback( + const std::function &C) { + PeepholeEPCallbacks.push_back(C); + } + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding late loop canonicalization and + /// simplification passes. This is the last point in the loop optimization + /// pipeline before loop deletion. Each pass added + /// here must be an instance of LoopPass. + /// This is the place to add passes that can remove loops, such as target- + /// specific loop idiom recognition. + void registerLateLoopOptimizationsEPCallback( + const std::function &C) { + LateLoopOptimizationsEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding loop passes to the end of the loop + /// optimizer. + void registerLoopOptimizerEndEPCallback( + const std::function &C) { + LoopOptimizerEndEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding optimization passes after most of the + /// main optimizations, but before the last cleanup-ish optimizations. + void registerScalarOptimizerLateEPCallback( + const std::function &C) { + ScalarOptimizerLateEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding CallGraphSCC passes at the end of the + /// main CallGraphSCC passes and before any function simplification passes run + /// by CGPassManager. + void registerCGSCCOptimizerLateEPCallback( + const std::function &C) { + CGSCCOptimizerLateEPCallbacks.push_back(C); + } + + /// \brief Register a callback for a default optimizer pipeline extension + /// point + /// + /// This extension point allows adding optimization passes before the + /// vectorizer and other highly target specific optimization passes are + /// executed. + void registerVectorizerStartEPCallback( + const std::function &C) { + VectorizerStartEPCallbacks.push_back(C); + } + + /// \brief Register a callback for parsing an AliasAnalysis Name to populate + /// the given AAManager \p AA + void registerParseAACallback( + const std::function &C) { + AAParsingCallbacks.push_back(C); + } + + /// {{@ Register callbacks for analysis registration with this PassBuilder + /// instance. + /// Callees register their analyses with the given AnalysisManager objects. + void registerAnalysisRegistrationCallback( + const std::function &C) { + CGSCCAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + FunctionAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + LoopAnalysisRegistrationCallbacks.push_back(C); + } + void registerAnalysisRegistrationCallback( + const std::function &C) { + ModuleAnalysisRegistrationCallbacks.push_back(C); + } + /// @}} + + /// {{@ Register pipeline parsing callbacks with this pass builder instance. + /// Using these callbacks, callers can parse both a single pass name, as well + /// as entire sub-pipelines, and populate the PassManager instance + /// accordingly. + void registerPipelineParsingCallback( + const std::function)> &C) { + CGSCCPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + FunctionPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + LoopPipelineParsingCallbacks.push_back(C); + } + void registerPipelineParsingCallback( + const std::function)> &C) { + ModulePipelineParsingCallbacks.push_back(C); + } + /// @}} + + /// \brief Register a callback for a top-level pipeline entry. + /// + /// If the PassManager type is not given at the top level of the pipeline + /// text, this Callback should be used to determine the appropriate stack of + /// PassManagers and populate the passed ModulePassManager. + void registerParseTopLevelPipelineCallback( + const std::function, + bool VerifyEachPass, bool DebugLogging)> &C) { + TopLevelPipelineParsingCallbacks.push_back(C); + } + +private: static Optional> parsePipelineText(StringRef Text); @@ -382,7 +547,106 @@ class PassBuilder { bool parseModulePassPipeline(ModulePassManager &MPM, ArrayRef Pipeline, bool VerifyEachPass, bool DebugLogging); + + void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + OptimizationLevel Level, bool RunProfileGen, + std::string ProfileGenFile, + std::string ProfileUseFile); + + void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); + + // Extension Point callbacks + SmallVector, 2> + PeepholeEPCallbacks; + SmallVector, 2> + LateLoopOptimizationsEPCallbacks; + SmallVector, 2> + LoopOptimizerEndEPCallbacks; + SmallVector, 2> + ScalarOptimizerLateEPCallbacks; + SmallVector, 2> + CGSCCOptimizerLateEPCallbacks; + SmallVector, 2> + VectorizerStartEPCallbacks; + // Module callbacks + SmallVector, 2> + ModuleAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + ModulePipelineParsingCallbacks; + SmallVector, + bool VerifyEachPass, bool DebugLogging)>, + 2> + TopLevelPipelineParsingCallbacks; + // CGSCC callbacks + SmallVector, 2> + CGSCCAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + CGSCCPipelineParsingCallbacks; + // Function callbacks + SmallVector, 2> + FunctionAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + FunctionPipelineParsingCallbacks; + // Loop callbacks + SmallVector, 2> + LoopAnalysisRegistrationCallbacks; + SmallVector)>, + 2> + LoopPipelineParsingCallbacks; + // AA callbacks + SmallVector, 2> + AAParsingCallbacks; }; + +/// This utility template takes care of adding require<> and invalidate<> +/// passes for an analysis to a given \c PassManager. It is intended to be used +/// during parsing of a pass pipeline when parsing a single PipelineName. +/// When registering a new function analysis FancyAnalysis with the pass +/// pipeline name "fancy-analysis", a matching ParsePipelineCallback could look +/// like this: +/// +/// static bool parseFunctionPipeline(StringRef Name, FunctionPassManager &FPM, +/// ArrayRef P) { +/// if (parseAnalysisUtilityPasses("fancy-analysis", Name, +/// FPM)) +/// return true; +/// return false; +/// } +template +bool parseAnalysisUtilityPasses( + StringRef AnalysisName, StringRef PipelineName, + PassManager &PM) { + if (!PipelineName.endswith(">")) + return false; + // See if this is an invalidate<> pass name + if (PipelineName.startswith("invalidate<")) { + PipelineName = PipelineName.substr(11, PipelineName.size() - 12); + if (PipelineName != AnalysisName) + return false; + PM.addPass(InvalidateAnalysisPass()); + return true; + } + + // See if this is a require<> pass name + if (PipelineName.startswith("require<")) { + PipelineName = PipelineName.substr(8, PipelineName.size() - 9); + if (PipelineName != AnalysisName) + return false; + PM.addPass(RequireAnalysisPass()); + return true; + } + + return false; +} } #endif diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index a6b2850ccd22..772187f70153 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -249,9 +249,8 @@ void annotateValueSite(Module &M, Instruction &Inst, /// Same as the above interface but using an ArrayRef, as well as \p Sum. void annotateValueSite(Module &M, Instruction &Inst, - ArrayRef VDs, - uint64_t Sum, InstrProfValueKind ValueKind, - uint32_t MaxMDCount); + ArrayRef VDs, uint64_t Sum, + InstrProfValueKind ValueKind, uint32_t MaxMDCount); /// Extract the value profile data from \p Inst which is annotated with /// value profile meta data. Return false if there is no value data annotated, @@ -582,34 +581,27 @@ struct InstrProfValueSiteRecord { /// Merge data from another InstrProfValueSiteRecord /// Optionally scale merged counts by \p Weight. - void merge(SoftInstrProfErrors &SIPE, InstrProfValueSiteRecord &Input, - uint64_t Weight = 1); + void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, + function_ref Warn); /// Scale up value profile data counts. - void scale(SoftInstrProfErrors &SIPE, uint64_t Weight); + void scale(uint64_t Weight, function_ref Warn); }; /// Profiling information for a single function. struct InstrProfRecord { - StringRef Name; - uint64_t Hash; std::vector Counts; - SoftInstrProfErrors SIPE; InstrProfRecord() = default; - InstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) - : Name(Name), Hash(Hash), Counts(std::move(Counts)) {} + InstrProfRecord(std::vector Counts) : Counts(std::move(Counts)) {} InstrProfRecord(InstrProfRecord &&) = default; InstrProfRecord(const InstrProfRecord &RHS) - : Name(RHS.Name), Hash(RHS.Hash), Counts(RHS.Counts), SIPE(RHS.SIPE), + : Counts(RHS.Counts), ValueData(RHS.ValueData ? llvm::make_unique(*RHS.ValueData) : nullptr) {} InstrProfRecord &operator=(InstrProfRecord &&) = default; InstrProfRecord &operator=(const InstrProfRecord &RHS) { - Name = RHS.Name; - Hash = RHS.Hash; Counts = RHS.Counts; - SIPE = RHS.SIPE; if (!RHS.ValueData) { ValueData = nullptr; return *this; @@ -626,7 +618,6 @@ struct InstrProfRecord { /// Return the number of value profile kinds with non-zero number /// of profile sites. inline uint32_t getNumValueKinds() const; - /// Return the number of instrumented sites for ValueKind. inline uint32_t getNumValueSites(uint32_t ValueKind) const; @@ -661,11 +652,12 @@ struct InstrProfRecord { /// Merge the counts in \p Other into this one. /// Optionally scale merged counts by \p Weight. - void merge(InstrProfRecord &Other, uint64_t Weight = 1); + void merge(InstrProfRecord &Other, uint64_t Weight, + function_ref Warn); /// Scale up profile counts (including value profile data) by /// \p Weight. - void scale(uint64_t Weight); + void scale(uint64_t Weight, function_ref Warn); /// Sort value profile data (per site) by count. void sortValueData() { @@ -683,9 +675,6 @@ struct InstrProfRecord { /// Clear value data entries void clearValueData() { ValueData = nullptr; } - /// Get the error contained within the record's soft error counter. - Error takeError() { return SIPE.takeError(); } - private: struct ValueProfData { std::vector IndirectCallSites; @@ -737,11 +726,23 @@ struct InstrProfRecord { // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. - void mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, - uint64_t Weight); + void mergeValueProfData(uint32_t ValkeKind, InstrProfRecord &Src, + uint64_t Weight, + function_ref Warn); // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight); + void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + function_ref Warn); +}; + +struct NamedInstrProfRecord : InstrProfRecord { + StringRef Name; + uint64_t Hash; + + NamedInstrProfRecord() = default; + NamedInstrProfRecord(StringRef Name, uint64_t Hash, + std::vector Counts) + : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -753,11 +754,8 @@ uint32_t InstrProfRecord::getNumValueKinds() const { uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { uint32_t N = 0; - const std::vector &SiteRecords = - getValueSitesForKind(ValueKind); - for (auto &SR : SiteRecords) { + for (auto &SR : getValueSitesForKind(ValueKind)) N += SR.ValueData.size(); - } return N; } diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 8163ca159209..424360e0f765 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -40,9 +40,9 @@ class InstrProfReader; /// A file format agnostic iterator over profiling data. class InstrProfIterator : public std::iterator { + NamedInstrProfRecord> { InstrProfReader *Reader = nullptr; - InstrProfRecord Record; + value_type Record; void Increment(); @@ -53,12 +53,12 @@ class InstrProfIterator : public std::iterator() { return &Record; } + value_type &operator*() { return Record; } + value_type *operator->() { return &Record; } }; /// Base class and interface for reading profiling data of any known instrprof -/// format. Provides an iterator over InstrProfRecords. +/// format. Provides an iterator over NamedInstrProfRecords. class InstrProfReader { instrprof_error LastError = instrprof_error::success; @@ -70,7 +70,7 @@ class InstrProfReader { virtual Error readHeader() = 0; /// Read a single record. - virtual Error readNextRecord(InstrProfRecord &Record) = 0; + virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0; /// Iterator over profile data. InstrProfIterator begin() { return InstrProfIterator(this); } @@ -161,7 +161,7 @@ class TextInstrProfReader : public InstrProfReader { Error readHeader() override; /// Read a single record. - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; InstrProfSymtab &getSymtab() override { assert(Symtab.get()); @@ -209,7 +209,7 @@ class RawInstrProfReader : public InstrProfReader { static bool hasFormat(const MemoryBuffer &DataBuffer); Error readHeader() override; - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; bool isIRLevelProfile() const override { return (Version & VARIANT_MASK_IR_PROF) != 0; @@ -243,8 +243,8 @@ class RawInstrProfReader : public InstrProfReader { return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); } - Error readName(InstrProfRecord &Record); - Error readFuncHash(InstrProfRecord &Record); + Error readName(NamedInstrProfRecord &Record); + Error readFuncHash(NamedInstrProfRecord &Record); Error readRawCounts(InstrProfRecord &Record); Error readValueProfilingData(InstrProfRecord &Record); bool atEnd() const { return Data == DataEnd; } @@ -281,7 +281,7 @@ enum class HashT : uint32_t; /// Trait for lookups into the on-disk hash table for the binary instrprof /// format. class InstrProfLookupTrait { - std::vector DataBuffer; + std::vector DataBuffer; IndexedInstrProf::HashT HashType; unsigned FormatVersion; // Endianness of the input value profile data. @@ -293,7 +293,7 @@ class InstrProfLookupTrait { InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion) : HashType(HashType), FormatVersion(FormatVersion) {} - using data_type = ArrayRef; + using data_type = ArrayRef; using internal_key_type = StringRef; using external_key_type = StringRef; @@ -334,11 +334,11 @@ struct InstrProfReaderIndexBase { // Read all the profile records with the same key pointed to the current // iterator. - virtual Error getRecords(ArrayRef &Data) = 0; + virtual Error getRecords(ArrayRef &Data) = 0; // Read all the profile records with the key equal to FuncName virtual Error getRecords(StringRef FuncName, - ArrayRef &Data) = 0; + ArrayRef &Data) = 0; virtual void advanceToNextKey() = 0; virtual bool atEnd() const = 0; virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; @@ -364,9 +364,9 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { IndexedInstrProf::HashT HashType, uint64_t Version); ~InstrProfReaderIndex() override = default; - Error getRecords(ArrayRef &Data) override; + Error getRecords(ArrayRef &Data) override; Error getRecords(StringRef FuncName, - ArrayRef &Data) override; + ArrayRef &Data) override; void advanceToNextKey() override { RecordIterator++; } bool atEnd() const override { @@ -419,10 +419,9 @@ class IndexedInstrProfReader : public InstrProfReader { /// Read the file header. Error readHeader() override; /// Read a single record. - Error readNextRecord(InstrProfRecord &Record) override; + Error readNextRecord(NamedInstrProfRecord &Record) override; - /// Return the pointer to InstrProfRecord associated with FuncName - /// and FuncHash + /// Return the NamedInstrProfRecord associated with FuncName and FuncHash Expected getInstrProfRecord(StringRef FuncName, uint64_t FuncHash); diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h index fff10af30295..8107ab386fe2 100644 --- a/include/llvm/ProfileData/InstrProfWriter.h +++ b/include/llvm/ProfileData/InstrProfWriter.h @@ -33,7 +33,7 @@ class raw_fd_ostream; class InstrProfWriter { public: - using ProfilingData = SmallDenseMap; + using ProfilingData = SmallDenseMap; enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; private: @@ -50,10 +50,15 @@ class InstrProfWriter { /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. - Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1); + void addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref Warn); + void addRecord(NamedInstrProfRecord &&I, function_ref Warn) { + addRecord(std::move(I), 1, Warn); + } /// Merge existing function counts from the given writer. - Error mergeRecordsFromWriter(InstrProfWriter &&IPW); + void mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref Warn); /// Write the profile to \c OS void write(raw_fd_ostream &OS); @@ -62,7 +67,8 @@ class InstrProfWriter { Error writeText(raw_fd_ostream &OS); /// Write \c Record in text format to \c OS - static void writeRecordInText(const InstrProfRecord &Record, + static void writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Counters, InstrProfSymtab &Symtab, raw_fd_ostream &OS); /// Write the profile, returning the raw data. For testing. @@ -85,6 +91,8 @@ class InstrProfWriter { void setOutputSparse(bool Sparse); private: + void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I, + uint64_t Weight, function_ref Warn); bool shouldEncodeData(const ProfilingData &PD); void writeImpl(ProfOStream &OS); }; diff --git a/include/llvm/ProfileData/ProfileCommon.h b/include/llvm/ProfileData/ProfileCommon.h index 987e3160ccae..51b065bcdb70 100644 --- a/include/llvm/ProfileData/ProfileCommon.h +++ b/include/llvm/ProfileData/ProfileCommon.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/ProfileSummary.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Error.h" #include #include @@ -27,8 +28,6 @@ namespace llvm { -struct InstrProfRecord; - namespace sampleprof { class FunctionSamples; diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h index 1b45cc52973f..2e75cbdd29c1 100644 --- a/include/llvm/Support/BlockFrequency.h +++ b/include/llvm/Support/BlockFrequency.h @@ -71,6 +71,10 @@ class BlockFrequency { bool operator>=(BlockFrequency RHS) const { return Frequency >= RHS.Frequency; } + + bool operator==(BlockFrequency RHS) const { + return Frequency == RHS.Frequency; + } }; } diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index be9e46540016..b19e37235df5 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -493,4 +493,14 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #define LLVM_THREAD_LOCAL #endif +/// \macro LLVM_ENABLE_EXCEPTIONS +/// \brief Whether LLVM is built with exception support. +#if __has_feature(cxx_exceptions) +#define LLVM_ENABLE_EXCEPTIONS 1 +#elif defined(__GNUC__) && defined(__EXCEPTIONS) +#define LLVM_ENABLE_EXCEPTIONS 1 +#elif defined(_MSC_VER) && defined(_CPPUNWIND) +#define LLVM_ENABLE_EXCEPTIONS 1 +#endif + #endif diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h index a8874a10d461..469d5dfad062 100644 --- a/include/llvm/Support/DynamicLibrary.h +++ b/include/llvm/Support/DynamicLibrary.h @@ -88,6 +88,22 @@ namespace sys { return !getPermanentLibrary(Filename, ErrMsg).isValid(); } + enum SearchOrdering { + /// SO_Linker - Search as a call to dlsym(dlopen(NULL)) would when + /// DynamicLibrary::getPermanentLibrary(NULL) has been called or + /// search the list of explcitly loaded symbols if not. + SO_Linker, + /// SO_LoadedFirst - Search all loaded libraries, then as SO_Linker would. + SO_LoadedFirst, + /// SO_LoadedLast - Search as SO_Linker would, then loaded libraries. + /// Only useful to search if libraries with RTLD_LOCAL have been added. + SO_LoadedLast, + /// SO_LoadOrder - Or this in to search libraries in the ordered loaded. + /// The default bahaviour is to search loaded libraries in reverse. + SO_LoadOrder = 4 + }; + static SearchOrdering SearchOrder; // = SO_Linker + /// This function will search through all previously loaded dynamic /// libraries for the symbol \p symbolName. If it is found, the address of /// that symbol is returned. If not, null is returned. Note that this will diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h index 7c1edd801571..b45f6348390e 100644 --- a/include/llvm/Support/ErrorHandling.h +++ b/include/llvm/Support/ErrorHandling.h @@ -78,12 +78,48 @@ LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, bool gen_crash_diag = true); - /// This function calls abort(), and prints the optional message to stderr. - /// Use the llvm_unreachable macro (that adds location info), instead of - /// calling this function directly. - LLVM_ATTRIBUTE_NORETURN void - llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, - unsigned line=0); +/// Installs a new bad alloc error handler that should be used whenever a +/// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM. +/// +/// The user can install a bad alloc handler, in order to define the behavior +/// in case of failing allocations, e.g. throwing an exception. Note that this +/// handler must not trigger any additional allocations itself. +/// +/// If no error handler is installed the default is to print the error message +/// to stderr, and call exit(1). If an error handler is installed then it is +/// the handler's responsibility to log the message, it will no longer be +/// printed to stderr. If the error handler returns, then exit(1) will be +/// called. +/// +/// +/// \param user_data - An argument which will be passed to the installed error +/// handler. +void install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data = nullptr); + +/// Restores default bad alloc error handling behavior. +void remove_bad_alloc_error_handler(); + +/// Reports a bad alloc error, calling any user defined bad alloc +/// error handler. In contrast to the generic 'report_fatal_error' +/// functions, this function is expected to return, e.g. the user +/// defined error handler throws an exception. +/// +/// Note: When throwing an exception in the bad alloc handler, make sure that +/// the following unwind succeeds, e.g. do not trigger additional allocations +/// in the unwind chain. +/// +/// If no error handler is installed (default), then a bad_alloc exception +/// is thrown if LLVM is compiled with exception support, otherwise an assertion +/// is called. +void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); + +/// This function calls abort(), and prints the optional message to stderr. +/// Use the llvm_unreachable macro (that adds location info), instead of +/// calling this function directly. +LLVM_ATTRIBUTE_NORETURN void +llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr, + unsigned line = 0); } /// Marks that the current location is not supposed to be reachable. diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index 9edf03aa3621..a0fec668e05c 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -32,6 +32,20 @@ namespace llvm { namespace DomTreeBuilder { +template +struct ChildrenGetter { + static auto Get(NodePtr N) -> decltype(reverse(children(N))) { + return reverse(children(N)); + } +}; + +template +struct ChildrenGetter { + static auto Get(NodePtr N) -> decltype(inverse_children(N)) { + return inverse_children(N); + } +}; + // Information record used by Semi-NCA during tree construction. template struct SemiNCAInfo { @@ -45,6 +59,7 @@ struct SemiNCAInfo { unsigned Semi = 0; NodePtr Label = nullptr; NodePtr IDom = nullptr; + SmallVector ReverseChildren; }; std::vector NumToNode; @@ -79,66 +94,49 @@ struct SemiNCAInfo { .get(); } - // External storage for depth first iterator that reuses the info lookup map - // SemiNCAInfo already has. We don't have a set, but a map instead, so we are - // converting the one argument insert calls. - struct df_iterator_dom_storage { - public: - using BaseSet = decltype(NodeToInfo); - df_iterator_dom_storage(BaseSet &Storage) : Storage(Storage) {} + static bool AlwaysDescend(NodePtr, NodePtr) { return true; } - using iterator = typename BaseSet::iterator; - std::pair insert(NodePtr N) { - return Storage.insert({N, InfoRec()}); - } - void completed(NodePtr) {} + // Custom DFS implementation which can skip nodes based on a provided + // predicate. It also collects ReverseChildren so that we don't have to spend + // time getting predecessors in SemiNCA. + template + unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, + unsigned AttachToNum) { + assert(V); + SmallVector WorkList = {V}; + if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; - private: - BaseSet &Storage; - }; - - df_iterator_dom_storage getStorage() { return {NodeToInfo}; } - - unsigned runReverseDFS(NodePtr V, unsigned N) { - auto DFStorage = getStorage(); - - bool IsChildOfArtificialExit = (N != 0); - for (auto I = idf_ext_begin(V, DFStorage), E = idf_ext_end(V, DFStorage); - I != E; ++I) { - NodePtr BB = *I; + while (!WorkList.empty()) { + const NodePtr BB = WorkList.pop_back_val(); auto &BBInfo = NodeToInfo[BB]; - BBInfo.DFSNum = BBInfo.Semi = ++N; + + // Visited nodes always have positive DFS numbers. + if (BBInfo.DFSNum != 0) continue; + BBInfo.DFSNum = BBInfo.Semi = ++LastNum; BBInfo.Label = BB; - // Set the parent to the top of the visited stack. The stack includes us, - // and is 1 based, so we subtract to account for both of these. - if (I.getPathLength() > 1) - BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum; - NumToNode.push_back(BB); // NumToNode[n] = V; + NumToNode.push_back(BB); - if (IsChildOfArtificialExit) - BBInfo.Parent = 1; + for (const NodePtr Succ : ChildrenGetter::Get(BB)) { + const auto SIT = NodeToInfo.find(Succ); + // Don't visit nodes more than once but remember to collect + // RerverseChildren. + if (SIT != NodeToInfo.end() && SIT->second.DFSNum != 0) { + if (Succ != BB) SIT->second.ReverseChildren.push_back(BB); + continue; + } - IsChildOfArtificialExit = false; + if (!Condition(BB, Succ)) continue; + + // It's fine to add Succ to the map, because we know that it will be + // visited later. + auto &SuccInfo = NodeToInfo[Succ]; + WorkList.push_back(Succ); + SuccInfo.Parent = LastNum; + SuccInfo.ReverseChildren.push_back(BB); + } } - return N; - } - unsigned runForwardDFS(NodePtr V, unsigned N) { - auto DFStorage = getStorage(); - - for (auto I = df_ext_begin(V, DFStorage), E = df_ext_end(V, DFStorage); - I != E; ++I) { - NodePtr BB = *I; - auto &BBInfo = NodeToInfo[BB]; - BBInfo.DFSNum = BBInfo.Semi = ++N; - BBInfo.Label = BB; - // Set the parent to the top of the visited stack. The stack includes us, - // and is 1 based, so we subtract to account for both of these. - if (I.getPathLength() > 1) - BBInfo.Parent = NodeToInfo[I.getPath(I.getPathLength() - 2)].DFSNum; - NumToNode.push_back(BB); // NumToNode[n] = V; - } - return N; + return LastNum; } NodePtr eval(NodePtr VIn, unsigned LastLinked) { @@ -181,31 +179,14 @@ struct SemiNCAInfo { template void runSemiNCA(DomTreeT &DT, unsigned NumBlocks) { - unsigned N = 0; - NumToNode.push_back(nullptr); - - bool MultipleRoots = (DT.Roots.size() > 1); - if (MultipleRoots) { - auto &BBInfo = NodeToInfo[nullptr]; - BBInfo.DFSNum = BBInfo.Semi = ++N; - BBInfo.Label = nullptr; - - NumToNode.push_back(nullptr); // NumToNode[n] = V; - } - // Step #1: Number blocks in depth-first order and initialize variables used // in later stages of the algorithm. - if (DT.isPostDominator()){ - for (unsigned i = 0, e = static_cast(DT.Roots.size()); - i != e; ++i) - N = runReverseDFS(DT.Roots[i], N); - } else { - N = runForwardDFS(DT.Roots[0], N); - } + const unsigned N = doFullDFSWalk(DT, AlwaysDescend); // It might be that some blocks did not get a DFS number (e.g., blocks of // infinite loops). In these cases an artificial exit node is required. - MultipleRoots |= (DT.isPostDominator() && N != NumBlocks); + const bool MultipleRoots = + DT.Roots.size() > 1 || (DT.isPostDominator() && N != NumBlocks); // Initialize IDoms to spanning tree parents. for (unsigned i = 1; i <= N; ++i) { @@ -221,7 +202,7 @@ struct SemiNCAInfo { // Initialize the semi dominator to point to the parent node. WInfo.Semi = WInfo.Parent; - for (const auto &N : inverse_children(W)) + for (const auto &N : WInfo.ReverseChildren) if (NodeToInfo.count(N)) { // Only if this predecessor is reachable! unsigned SemiU = NodeToInfo[eval(N, i + 1)].Semi; if (SemiU < WInfo.Semi) @@ -279,14 +260,27 @@ struct SemiNCAInfo { } } - void doFullDFSWalk(const DomTreeT &DT) { - NumToNode.push_back(nullptr); + template + unsigned doFullDFSWalk(const DomTreeT &DT, DescendCondition DC) { unsigned Num = 0; - for (auto *Root : DT.Roots) - if (!DT.isPostDominator()) - Num = runForwardDFS(Root, Num); - else - Num = runReverseDFS(Root, Num); + NumToNode.push_back(nullptr); + + if (DT.Roots.size() > 1) { + auto &BBInfo = NodeToInfo[nullptr]; + BBInfo.DFSNum = BBInfo.Semi = ++Num; + BBInfo.Label = nullptr; + + NumToNode.push_back(nullptr); // NumToNode[n] = V; + } + + if (DT.isPostDominator()) { + for (auto *Root : DT.Roots) Num = runDFS(Root, Num, DC, 1); + } else { + assert(DT.Roots.size() == 1); + Num = runDFS(DT.Roots[0], Num, DC, Num); + } + + return Num; } static void PrintBlockOrNullptr(raw_ostream &O, NodePtr Obj) { @@ -299,7 +293,7 @@ struct SemiNCAInfo { // Checks if the tree contains all reachable nodes in the input graph. bool verifyReachability(const DomTreeT &DT) { clear(); - doFullDFSWalk(DT); + doFullDFSWalk(DT, AlwaysDescend); for (auto &NodeToTN : DT.DomTreeNodes) { const TreeNodePtr TN = NodeToTN.second.get(); @@ -356,7 +350,7 @@ struct SemiNCAInfo { // NCD(From, To) == IDom(To) or To. bool verifyNCD(const DomTreeT &DT) { clear(); - doFullDFSWalk(DT); + doFullDFSWalk(DT, AlwaysDescend); for (auto &BlockToInfo : NodeToInfo) { auto &Info = BlockToInfo.second; @@ -440,8 +434,9 @@ struct SemiNCAInfo { if (!BB || TN->getChildren().empty()) continue; clear(); - NodeToInfo.insert({BB, {}}); - doFullDFSWalk(DT); + doFullDFSWalk(DT, [BB](NodePtr From, NodePtr To) { + return From != BB && To != BB; + }); for (TreeNodePtr Child : TN->getChildren()) if (NodeToInfo.count(Child->getBlock()) != 0) { @@ -473,8 +468,10 @@ struct SemiNCAInfo { const auto &Siblings = TN->getChildren(); for (const TreeNodePtr N : Siblings) { clear(); - NodeToInfo.insert({N->getBlock(), {}}); - doFullDFSWalk(DT); + NodePtr BBN = N->getBlock(); + doFullDFSWalk(DT, [BBN](NodePtr From, NodePtr To) { + return From != BBN && To != BBN; + }); for (const TreeNodePtr S : Siblings) { if (S == N) continue; diff --git a/include/llvm/Support/ReverseIteration.h b/include/llvm/Support/ReverseIteration.h new file mode 100644 index 000000000000..cb97b60f06dd --- /dev/null +++ b/include/llvm/Support/ReverseIteration.h @@ -0,0 +1,17 @@ +#ifndef LLVM_SUPPORT_REVERSEITERATION_H +#define LLVM_SUPPORT_REVERSEITERATION_H + +#include "llvm/Config/abi-breaking.h" + +namespace llvm { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +template struct ReverseIterate { static bool value; }; +#if LLVM_ENABLE_REVERSE_ITERATION +template bool ReverseIterate::value = true; +#else +template bool ReverseIterate::value = false; +#endif +#endif +} + +#endif diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h index d4d4d8eb84a4..4c655833b396 100644 --- a/include/llvm/Support/UnicodeCharRanges.h +++ b/include/llvm/Support/UnicodeCharRanges.h @@ -18,11 +18,11 @@ #include "llvm/Support/raw_ostream.h" #include +#define DEBUG_TYPE "unicode" + namespace llvm { namespace sys { -#define DEBUG_TYPE "unicode" - /// \brief Represents a closed range of Unicode code points [Lower, Upper]. struct UnicodeCharRange { uint32_t Lower; @@ -99,10 +99,9 @@ class UnicodeCharSet { const CharRanges Ranges; }; -#undef DEBUG_TYPE // "unicode" - } // namespace sys } // namespace llvm +#undef DEBUG_TYPE // "unicode" #endif // LLVM_SUPPORT_UNICODECHARRANGES_H diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 3a3118139bcb..178b08d7b8b7 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -64,6 +64,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 2fc3ec996e7f..1843a2eed9bf 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -1545,6 +1545,16 @@ class TargetInstrInfo : public MCInstrInfo { return None; } + /// Return an array that contains the MMO target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the MMO target flags that are + /// defined by this method. + virtual ArrayRef> + getSerializableMachineMemOperandTargetFlags() const { + return None; + } + /// Determines whether \p Inst is a tail call instruction. Override this /// method on targets that do not properly set MCID::Return and MCID::Call on /// tail call instructions." diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 964d6314b127..60a03bdc182d 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -415,7 +415,8 @@ class TargetLoweringBase { virtual bool mergeStoresAfterLegalization() const { return false; } /// Returns if it's reasonable to merge stores to MemVT size. - virtual bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const { + virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { return true; } @@ -2726,6 +2727,18 @@ class TargetLowering : public TargetLoweringBase { return true; } + // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 0), + // (extract_elt V, 2), + // (extract_elt V, 4), + // (extract_elt V, 6)) + // --> + // v4i32 truncate (bitcast V to v4i64) + virtual bool isDesirableToCombineBuildVectorToTruncate() const { + return false; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer @@ -2815,6 +2828,9 @@ class TargetLowering : public TargetLoweringBase { // TargetLowering::LowerCall that perform tail call conversions. bool IsTailCall = false; + // Is Call lowering done post SelectionDAG type legalization. + bool IsPostTypeLegalization = false; + unsigned NumFixedArgs = -1; CallingConv::ID CallConv = CallingConv::C; SDValue Callee; @@ -2937,6 +2953,11 @@ class TargetLowering : public TargetLoweringBase { return *this; } + CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { + IsPostTypeLegalization = Value; + return *this; + } + ArgListTy &getArgs() { return Args; } @@ -3055,6 +3076,13 @@ class TargetLowering : public TargetLoweringBase { return Chain; } + /// This callback is used to inspect load/store instructions and add + /// target-specific MachineMemOperand flags to them. The default + /// implementation does nothing. + virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { + return MachineMemOperand::MONone; + } + /// This callback is invoked by the type legalizer to legalize nodes with an /// illegal operand type but legal result types. It replaces the /// LowerOperation callback in the type Legalizer. The reason we can not do diff --git a/include/llvm/Transforms/Scalar/GVN.h b/include/llvm/Transforms/Scalar/GVN.h index 4c585a20021c..f25ab40640df 100644 --- a/include/llvm/Transforms/Scalar/GVN.h +++ b/include/llvm/Transforms/Scalar/GVN.h @@ -68,21 +68,6 @@ class GVN : public PassInfoMixin { class ValueTable { DenseMap valueNumbering; DenseMap expressionNumbering; - - // Expressions is the vector of Expression. ExprIdx is the mapping from - // value number to the index of Expression in Expressions. We use it - // instead of a DenseMap because filling such mapping is faster than - // filling a DenseMap and the compile time is a little better. - uint32_t nextExprNumber; - std::vector Expressions; - std::vector ExprIdx; - // Value number to PHINode mapping. Used for phi-translate in scalarpre. - DenseMap NumberingPhi; - // Cache for phi-translate in scalarpre. - typedef DenseMap, uint32_t> - PhiTranslateMap; - PhiTranslateMap PhiTranslateTable; - AliasAnalysis *AA; MemoryDependenceResults *MD; DominatorTree *DT; @@ -94,10 +79,6 @@ class GVN : public PassInfoMixin { Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); - uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn); - std::pair assignExpNewValueNum(Expression &exp); - bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); public: ValueTable(); @@ -106,11 +87,9 @@ class GVN : public PassInfoMixin { ~ValueTable(); uint32_t lookupOrAdd(Value *V); - uint32_t lookup(Value *V, bool Verify = true) const; + uint32_t lookup(Value *V) const; uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); - uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn); bool exists(Value *V) const; void add(Value *V, uint32_t num); void clear(); @@ -152,10 +131,6 @@ class GVN : public PassInfoMixin { SmallMapVector ReplaceWithConstMap; SmallVector InstrsToErase; - // Map the block to reversed postorder traversal number. It is used to - // find back edge easily. - DenseMap BlockRPONumber; - typedef SmallVector LoadDepVect; typedef SmallVector AvailValInBlkVect; typedef SmallVector UnavailBlkVect; @@ -239,7 +214,7 @@ class GVN : public PassInfoMixin { bool performPRE(Function &F); bool performScalarPRE(Instruction *I); bool performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, - BasicBlock *Curr, unsigned int ValNo); + unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -251,7 +226,6 @@ class GVN : public PassInfoMixin { bool processFoldableCondBr(BranchInst *BI); void addDeadBlock(BasicBlock *BB); void assignValNumForDeadCode(); - void assignBlockRPONumber(Function &F); }; /// Create a legacy GVN pass. This also allows parameterizing whether or not diff --git a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h index e4906b709e4b..4554b5cbc644 100644 --- a/include/llvm/Transforms/Utils/LowerMemIntrinsics.h +++ b/include/llvm/Transforms/Utils/LowerMemIntrinsics.h @@ -17,21 +17,39 @@ namespace llvm { +class ConstantInt; class Instruction; class MemCpyInst; class MemMoveInst; class MemSetInst; +class TargetTransformInfo; class Value; /// Emit a loop implementing the semantics of llvm.memcpy with the equivalent /// arguments at \p InsertBefore. -void createMemCpyLoop(Instruction *InsertBefore, - Value *SrcAddr, Value *DstAddr, Value *CopyLen, - unsigned SrcAlign, unsigned DestAlign, +void createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, bool SrcIsVolatile, bool DstIsVolatile); +/// Emit a loop implementing the semantics of llvm.memcpy where the size is not +/// a compile-time constant. Loop will be insterted at \p InsertBefore. +void createMemCpyLoopUnknownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, Value *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI); + +/// Emit a loop implementing the semantics of an llvm.memcpy whose size is a +/// compile time constant. Loop is inserted at \p InsertBefore. +void createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI); + + /// Expand \p MemCpy as a loop. \p MemCpy is not deleted. -void expandMemCpyAsLoop(MemCpyInst *MemCpy); +void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI); /// Expand \p MemMove as a loop. \p MemMove is not deleted. void expandMemMoveAsLoop(MemMoveInst *MemMove); diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index b0448fed9f4d..2dd205d8b2af 100644 --- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -22,10 +22,10 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -namespace llvm { - #define DEBUG_TYPE "ssaupdater" +namespace llvm { + class CastInst; class PHINode; template class SSAUpdaterTraits; @@ -453,8 +453,8 @@ class SSAUpdaterImpl { } }; +} // end llvm namespace + #undef DEBUG_TYPE // "ssaupdater" -} // End llvm namespace - -#endif +#endif // LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index dd419e861316..766198bbc5de 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -23,6 +23,7 @@ module LLVM_Backend { exclude header "CodeGen/CommandFlags.h" exclude header "CodeGen/LinkAllAsmWriterComponents.h" exclude header "CodeGen/LinkAllCodegenComponents.h" + exclude header "CodeGen/GlobalISel/InstructionSelectorImpl.h" // These are intended for (repeated) textual inclusion. textual header "CodeGen/DIEValue.def" diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index b52a1d7b24d6..e682a644ef2c 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1006,7 +1006,7 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // Because they cannot partially overlap and because fields in an array // cannot overlap, if we can prove the final indices are different between // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. - + // If the last indices are constants, we've already checked they don't // equal each other so we can exit early. if (C1 && C2) diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 23d5a887c34a..a329e5ad48c9 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -538,7 +538,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; - } else if (CV->isAllOnesValue()) { + } else if (CV->isMinusOne()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp index 9d4521221f47..3ddefc6520a7 100644 --- a/lib/Analysis/CGSCCPassManager.cpp +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -196,18 +196,117 @@ FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( LazyCallGraph::SCC &C, const PreservedAnalyses &PA, CGSCCAnalysisManager::Invalidator &Inv) { - for (LazyCallGraph::Node &N : C) - FAM->invalidate(N.getFunction(), PA); + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. - // This proxy doesn't need to handle invalidation itself. Instead, the - // module-level CGSCC proxy handles it above by ensuring that if the - // module-level FAM proxy becomes invalid the entire SCC layer, which - // includes this proxy, is cleared. + // If this proxy isn't marked as preserved, then even if the result remains + // valid, the key itself may no longer be valid, so we clear everything. + // + // Note that in order to preserve this proxy, a module pass must ensure that + // the FAM has been completely updated to handle the deletion of functions. + // Specifically, any FAM-cached results for those functions need to have been + // forcibly cleared. When preserved, this proxy will only invalidate results + // cached on functions *still in the module* at the end of the module pass. + auto PAC = PA.getChecker(); + if (!PAC.preserved() && !PAC.preservedSet>()) { + for (LazyCallGraph::Node &N : C) + FAM->clear(N.getFunction()); + + return true; + } + + // Directly check if the relevant set is preserved. + bool AreFunctionAnalysesPreserved = + PA.allAnalysesInSetPreserved>(); + + // Now walk all the functions to see if any inner analysis invalidation is + // necessary. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + Optional FunctionPA; + + // Check to see whether the preserved set needs to be pruned based on + // SCC-level analysis invalidation that triggers deferred invalidation + // registered with the outer analysis manager proxy for this function. + if (auto *OuterProxy = + FAM->getCachedResult(F)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, C, PA)) { + if (!FunctionPA) + FunctionPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + FunctionPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set, and if so we'll need to run the + // inner invalidation. + if (FunctionPA) { + FAM->invalidate(F, *FunctionPA); + continue; + } + + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all function analyses. + if (!AreFunctionAnalysesPreserved) + FAM->invalidate(F, PA); + } + + // Return false to indicate that this result is still a valid proxy. return false; } } // End llvm namespace +/// When a new SCC is created for the graph and there might be function +/// analysis results cached for the functions now in that SCC two forms of +/// updates are required. +/// +/// First, a proxy from the SCC to the FunctionAnalysisManager needs to be +/// created so that any subsequent invalidation events to the SCC are +/// propagated to the function analysis results cached for functions within it. +/// +/// Second, if any of the functions within the SCC have analysis results with +/// outer analysis dependencies, then those dependencies would point to the +/// *wrong* SCC's analysis result. We forcibly invalidate the necessary +/// function analyses so that they don't retain stale handles. +static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C, + LazyCallGraph &G, + CGSCCAnalysisManager &AM) { + // Get the relevant function analysis manager. + auto &FAM = + AM.getResult(C, G).getManager(); + + // Now walk the functions in this SCC and invalidate any function analysis + // results that might have outer dependencies on an SCC analysis. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + + auto *OuterProxy = + FAM.getCachedResult(F); + if (!OuterProxy) + // No outer analyses were queried, nothing to do. + continue; + + // Forcibly abandon all the inner analyses with dependencies, but + // invalidate nothing else. + auto PA = PreservedAnalyses::all(); + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + PA.abandon(InnerAnalysisID); + } + + // Now invalidate anything we found. + FAM.invalidate(F, PA); + } +} + namespace { /// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c /// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly @@ -236,7 +335,6 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; SCC *OldC = C; - (void)OldC; // Update the current SCC. Note that if we have new SCCs, this must actually // change the SCC. @@ -245,6 +343,26 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, C = &*NewSCCRange.begin(); assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + // If we had a cached FAM proxy originally, we will want to create more of + // them for each SCC that was split off. + bool NeedFAMProxy = + AM.getCachedResult(*OldC) != nullptr; + + // We need to propagate an invalidation call to all but the newly current SCC + // because the outer pass manager won't do that for us after splitting them. + // FIXME: We should accept a PreservedAnalysis from the CG updater so that if + // there are preserved ananalyses we can avoid invalidating them here for + // split-off SCCs. + // We know however that this will preserve any FAM proxy so go ahead and mark + // that. + PreservedAnalyses PA; + PA.preserve(); + AM.invalidate(*OldC, PA); + + // Ensure the now-current SCC's function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(*C, G, AM); + for (SCC &NewC : reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { assert(C != &NewC && "No need to re-visit the current SCC!"); @@ -252,6 +370,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, UR.CWorklist.insert(&NewC); if (DebugLogging) dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + + // Ensure new SCCs' function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(NewC, G, AM); + + // Also propagate a normal invalidation to the new SCC as only the current + // will get one from the pass manager infrastructure. + AM.invalidate(NewC, PA); } return C; } @@ -349,14 +475,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // For separate SCCs this is trivial. RC->switchTrivialInternalEdgeToRef(N, TargetN); } else { - // Otherwise we may end up re-structuring the call graph. First, - // invalidate any SCC analyses. We have to do this before we split - // functions into new SCCs and lose track of where their analyses are - // cached. - // FIXME: We should accept a more precise preserved set here. For - // example, it might be possible to preserve some function analyses - // even as the SCC structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, AM, UR, DebugLogging); @@ -424,13 +542,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( continue; } - // Otherwise we may end up re-structuring the call graph. First, invalidate - // any SCC analyses. We have to do this before we split functions into new - // SCCs and lose track of where their analyses are cached. - // FIXME: We should accept a more precise preserved set here. For example, - // it might be possible to preserve some function analyses even as the SCC - // structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, C, AM, UR, DebugLogging); @@ -459,25 +570,48 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also // need to make sure to update the worklist in the event SCCs have moved - // before the current one in the post-order sequence. + // before the current one in the post-order sequence + bool HasFunctionAnalysisProxy = false; auto InitialSCCIndex = RC->find(*C) - RC->begin(); - auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget); - if (!InvalidatedSCCs.empty()) { + bool FormedCycle = RC->switchInternalEdgeToCall( + N, *CallTarget, [&](ArrayRef MergedSCCs) { + for (SCC *MergedC : MergedSCCs) { + assert(MergedC != &TargetC && "Cannot merge away the target SCC!"); + + HasFunctionAnalysisProxy |= + AM.getCachedResult( + *MergedC) != nullptr; + + // Mark that this SCC will no longer be valid. + UR.InvalidatedSCCs.insert(MergedC); + + // FIXME: We should really do a 'clear' here to forcibly release + // memory, but we don't have a good way of doing that and + // preserving the function analyses. + auto PA = PreservedAnalyses::allInSet>(); + PA.preserve(); + AM.invalidate(*MergedC, PA); + } + }); + + // If we formed a cycle by creating this call, we need to update more data + // structures. + if (FormedCycle) { C = &TargetC; assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + // If one of the invalidated SCCs had a cached proxy to a function + // analysis manager, we need to create a proxy in the new current SCC as + // the invaliadted SCCs had their functions moved. + if (HasFunctionAnalysisProxy) + AM.getResult(*C, G); + // Any analyses cached for this SCC are no longer precise as the shape - // has changed by introducing this cycle. - AM.invalidate(*C, PreservedAnalyses::none()); - - for (SCC *InvalidatedC : InvalidatedSCCs) { - assert(InvalidatedC != C && "Cannot invalidate the current SCC!"); - UR.InvalidatedSCCs.insert(InvalidatedC); - - // Also clear any cached analyses for the SCCs that are dead. This - // isn't really necessary for correctness but can release memory. - AM.clear(*InvalidatedC); - } + // has changed by introducing this cycle. However, we have taken care to + // update the proxies so it remains valide. + auto PA = PreservedAnalyses::allInSet>(); + PA.preserve(); + AM.invalidate(*C, PA); } auto NewSCCIndex = RC->find(*C) - RC->begin(); if (InitialSCCIndex < NewSCCIndex) { diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 2093f0fdec12..3b0026ba10e9 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -94,8 +94,8 @@ namespace { // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or // by its successors, i.e, prune if: // - // (1) BB is an entry block or have no sucessors. - // (2) There's no path coming back through BB sucessors. + // (1) BB is an entry block or have no successors. + // (2) There's no path coming back through BB successors. if (BB == &BB->getParent()->getEntryBlock() || !BB->getTerminator()->getNumSuccessors()) return true; diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 926b28d6094a..9c53f9140ca3 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -143,9 +143,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::Shl: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.lshr(ShiftAmt); // If the shift is nuw/nsw, then the high bits are not dead @@ -159,9 +158,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::LShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // If the shift is exact, then the low bits are not dead @@ -172,9 +170,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::AShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // Because the high input bit is replicated into the // high-order bits of the result, if we need any of those diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index e4d58bf1b4eb..34eccc07f265 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -3342,7 +3342,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && - (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) && + isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); @@ -3371,7 +3372,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } @@ -3796,7 +3797,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d9e32a3c417e..f6632020b8fc 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -560,7 +560,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Y; /// i1 add -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -598,7 +598,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { - assert(V->getType()->getScalarType()->isPointerTy()); + assert(V->getType()->isPtrOrPtrVectorTy()); Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); @@ -627,8 +627,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, } break; } - assert(V->getType()->getScalarType()->isPointerTy() && - "Unexpected operand type!"); + assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); @@ -771,7 +770,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -902,7 +901,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return X; // i1 mul -> and. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -998,7 +997,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1)) + if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1)) return IsDiv ? Op0 : Constant::getNullValue(Ty); return nullptr; @@ -2251,7 +2250,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. - if (!OpTy->getScalarType()->isIntegerTy(1)) + if (!OpTy->isIntOrIntVectorTy(1)) return nullptr; // A boolean compared to true/false can be simplified in 14 out of the 20 diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index b6a9436cc1ec..a4c3e43b4b0c 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -456,8 +456,10 @@ updatePostorderSequenceForEdgeInsertion( return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx); } -SmallVector -LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { +bool +LazyCallGraph::RefSCC::switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref MergeSCCs)> MergeCB) { assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!"); SmallVector DeletedSCCs; @@ -475,7 +477,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // we've just added more connectivity. if (&SourceSCC == &TargetSCC) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // At this point we leverage the postorder list of SCCs to detect when the @@ -488,7 +490,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { int TargetIdx = SCCIndices[&TargetSCC]; if (TargetIdx < SourceIdx) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // Compute the SCCs which (transitively) reach the source. @@ -555,12 +557,16 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet, ComputeTargetConnectedSet); + // Run the user's callback on the merged SCCs before we actually merge them. + if (MergeCB) + MergeCB(makeArrayRef(MergeRange.begin(), MergeRange.end())); + // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. if (MergeRange.begin() == MergeRange.end()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } #ifndef NDEBUG @@ -596,8 +602,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - // And we're done! - return DeletedSCCs; + // And we're done, but we did form a new cycle. + return true; } void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 9713588537b3..ada600a69b87 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -405,7 +405,7 @@ void Lint::visitMemoryReference(Instruction &I, Assert(!isa(UnderlyingObject), "Undefined behavior: Undef pointer dereference", &I); Assert(!isa(UnderlyingObject) || - !cast(UnderlyingObject)->isAllOnesValue(), + !cast(UnderlyingObject)->isMinusOne(), "Unusual: All-ones pointer dereference", &I); Assert(!isa(UnderlyingObject) || !cast(UnderlyingObject)->isOne(), diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index ff68810abb82..baf932432a0a 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -131,13 +131,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { PHINode *PN = cast(I); if (ConstantInt *CI = dyn_cast(PN->getIncomingValueForBlock(Incoming))) - if (CI->isNullValue()) + if (CI->isZero()) if (Instruction *Inc = dyn_cast(PN->getIncomingValueForBlock(Backedge))) if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) if (ConstantInt *CI = dyn_cast(Inc->getOperand(1))) - if (CI->equalsInt(1)) + if (CI->isOne()) return PN; } return nullptr; @@ -460,7 +460,7 @@ class UnloopUpdater { void UnloopUpdater::updateBlockParents() { if (Unloop.getNumBlocks()) { // Perform a post order CFG traversal of all blocks within this loop, - // propagating the nearest loop from sucessors to predecessors. + // propagating the nearest loop from successors to predecessors. LoopBlocksTraversal Traversal(DFS, LI); for (BasicBlock *POI : Traversal) { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f88d54b21e1e..7327c07499be 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -505,6 +505,22 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return unknown(); } +/// When we're compiling N-bit code, and the user uses parameters that are +/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into +/// trouble with APInt size issues. This function handles resizing + overflow +/// checks for us. Check and zext or trunc \p I depending on IntTyBits and +/// I's value. +bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { + // More bits than we can handle. Checking the bit width isn't necessary, but + // it's faster than checking active bits, and should give `false` in the + // vast majority of cases. + if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) + return false; + if (I.getBitWidth() != IntTyBits) + I = I.zextOrTrunc(IntTyBits); + return true; +} + SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); @@ -515,8 +531,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { Value *ArraySize = I.getArraySize(); if (const ConstantInt *C = dyn_cast(ArraySize)) { - Size *= C->getValue().zextOrSelf(IntTyBits); - return std::make_pair(align(Size, I.getAlignment()), Zero); + APInt NumElems = C->getValue(); + if (!CheckedZextOrTrunc(NumElems)) + return unknown(); + + bool Overflow; + Size = Size.umul_ov(NumElems, Overflow); + return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()), + Zero); } return unknown(); } @@ -561,21 +583,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { if (!Arg) return unknown(); - // When we're compiling N-bit code, and the user uses parameters that are - // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into - // trouble with APInt size issues. This function handles resizing + overflow - // checks for us. - auto CheckedZextOrTrunc = [&](APInt &I) { - // More bits than we can handle. Checking the bit width isn't necessary, but - // it's faster than checking active bits, and should give `false` in the - // vast majority of cases. - if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) - return false; - if (I.getBitWidth() != IntTyBits) - I = I.zextOrTrunc(IntTyBits); - return true; - }; - APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 095647e1bd20..e9e354ebb88f 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -266,7 +266,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( - CalleeInfo::HotnessType::Hot); + CalleeInfo::HotnessType::Critical); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 678ad3af5e85..3fb1ab980add 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -326,7 +326,7 @@ bool SCEV::isOne() const { bool SCEV::isAllOnesValue() const { if (const SCEVConstant *SC = dyn_cast(this)) - return SC->getValue()->isAllOnesValue(); + return SC->getValue()->isMinusOne(); return false; } @@ -2743,7 +2743,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } // If we are left with a constant one being multiplied, strip it off. - if (cast(Ops[0])->getValue()->equalsInt(1)) { + if (cast(Ops[0])->getValue()->isOne()) { Ops.erase(Ops.begin()); --Idx; } else if (cast(Ops[0])->getValue()->isZero()) { @@ -2939,7 +2939,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, "SCEVUDivExpr operand types don't match!"); if (const SCEVConstant *RHSC = dyn_cast(RHS)) { - if (RHSC->getValue()->equalsInt(1)) + if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x // If the denominator is zero, the result of the udiv is undefined. Don't // try to analyze it, because the resolution chosen here may differ from @@ -5421,9 +5421,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast(BO->RHS)) { - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->RHS); - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getSCEV(BO->LHS); const APInt &A = CI->getValue(); @@ -5498,7 +5498,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Xor: if (ConstantInt *CI = dyn_cast(BO->RHS)) { // If the RHS of xor is -1, then this is a not operation. - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getNotSCEV(getSCEV(BO->LHS)); // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. @@ -5577,7 +5577,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (CI->getValue().uge(BitWidth)) break; - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->LHS); // shift by zero --> noop uint64_t AShrAmt = CI->getZExtValue(); @@ -7626,7 +7626,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (!StepC || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -7640,7 +7640,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) - if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { + if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { APInt MaxBECount = getUnsignedRangeMax(Distance); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, @@ -7696,7 +7696,7 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { // If the value is a constant, check to see if it is known to be non-zero // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast(V)) { - if (!C->getValue()->isNullValue()) + if (!C->getValue()->isZero()) return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index f938a9a52065..94bbc58541a7 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include @@ -23,6 +24,11 @@ using namespace llvm; #define DEBUG_TYPE "tti" +static cl::opt UseWideMemcpyLoopLowering( + "use-wide-memcpy-loop-lowering", cl::init(false), + cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), + cl::Hidden); + namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. @@ -482,6 +488,25 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } +Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, + Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const { + return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, + DestAlign); +} + +void TargetTransformInfo::getMemcpyLoopResidualLoweringType( + SmallVectorImpl &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { + TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); +} + +bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { + return UseWideMemcpyLoopLowering; +} + bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index fd6e3a643bf0..9e042da8801d 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1500,12 +1500,10 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Known.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarType()->isPointerTy()) && + assert((V->getType()->isIntOrIntVectorTy(BitWidth) || + V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarSizeInBits() == BitWidth) && + assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; @@ -1952,7 +1950,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } // Check if all incoming values are non-zero constant. bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { - return isa(V) && !cast(V)->isZeroValue(); + return isa(V) && !cast(V)->isZero(); }); if (AllNonZeroConstants) return true; @@ -4393,7 +4391,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, } Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, bool InvertAPred, + const DataLayout &DL, bool LHSIsFalse, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -4402,26 +4400,51 @@ Optional llvm::isImpliedCondition(const Value *LHS, const Value *RHS, return None; Type *OpTy = LHS->getType(); - assert(OpTy->getScalarType()->isIntegerTy(1)); + assert(OpTy->isIntOrIntVectorTy(1)); // LHS ==> RHS by definition - if (!InvertAPred && LHS == RHS) - return true; + if (LHS == RHS) + return !LHSIsFalse; if (OpTy->isVectorTy()) // TODO: extending the code below to handle vectors return None; assert(OpTy->isIntegerTy(1) && "implied by above"); - ICmpInst::Predicate APred, BPred; - Value *ALHS, *ARHS; Value *BLHS, *BRHS; - - if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || - !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + ICmpInst::Predicate BPred; + // We expect the RHS to be an icmp. + if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) return None; - if (InvertAPred) + Value *ALHS, *ARHS; + ICmpInst::Predicate APred; + // The LHS can be an 'or', 'and', or 'icmp'. + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) { + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth == MaxDepth) + return None; + // If the result of an 'or' is false, then we know both legs of the 'or' are + // false. Similarly, if the result of an 'and' is true, then we know both + // legs of the 'and' are true. + if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || + (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { + if (Optional Implication = isImpliedCondition( + ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + if (Optional Implication = isImpliedCondition( + ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + return None; + } + return None; + } + // All of the below logic assumes both LHS and RHS are icmps. + assert(isa(LHS) && isa(RHS) && "Expected icmps."); + + // The rest of the logic assumes the LHS condition is true. If that's not the + // case, invert the predicate to make it so. + if (LHSIsFalse) APred = CmpInst::getInversePredicate(APred); // Can we infer anything when the two compares have matching operands? diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 0ace8fa382bc..554d132c2ab7 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -301,7 +301,7 @@ const llvm::Value *llvm::getSplatValue(const Value *V) { auto *InsertEltInst = dyn_cast(ShuffleInst->getOperand(0)); if (!InsertEltInst || !isa(InsertEltInst->getOperand(2)) || - !cast(InsertEltInst->getOperand(2))->isNullValue()) + !cast(InsertEltInst->getOperand(2))->isZero()) return nullptr; return InsertEltInst->getOperand(1); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index a49276099f19..428bb21fbf51 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -542,7 +542,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(release); KEYWORD(acq_rel); KEYWORD(seq_cst); - KEYWORD(singlethread); + KEYWORD(syncscope); KEYWORD(nnan); KEYWORD(ninf); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 9ad31125f4b8..717eb0e00f4f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1919,20 +1919,42 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg, } /// ParseScopeAndOrdering -/// if isAtomic: ::= 'singlethread'? AtomicOrdering +/// if isAtomic: ::= SyncScope? AtomicOrdering /// else: ::= /// /// This sets Scope and Ordering to the parsed values. -bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, +bool LLParser::ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering) { if (!isAtomic) return false; - Scope = CrossThread; - if (EatIfPresent(lltok::kw_singlethread)) - Scope = SingleThread; + return ParseScope(SSID) || ParseOrdering(Ordering); +} - return ParseOrdering(Ordering); +/// ParseScope +/// ::= syncscope("singlethread" | "")? +/// +/// This sets synchronization scope ID to the ID of the parsed value. +bool LLParser::ParseScope(SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (EatIfPresent(lltok::kw_syncscope)) { + auto StartParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(StartParenAt, "Expected '(' in syncscope"); + + std::string SSN; + auto SSNAt = Lex.getLoc(); + if (ParseStringConstant(SSN)) + return Error(SSNAt, "Expected synchronization scope name"); + + auto EndParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(EndParenAt, "Expected ')' in syncscope"); + + SSID = Context.getOrInsertSyncScopeID(SSN); + } + + return false; } /// ParseOrdering @@ -3061,7 +3083,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->getScalarType()->isPointerTy()) + !Val0->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -3210,7 +3232,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Opc == Instruction::GetElementPtr) { if (Elts.size() == 0 || - !Elts[0]->getType()->getScalarType()->isPointerTy()) + !Elts[0]->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "base of getelementptr must be a pointer"); Type *BaseType = Elts[0]->getType(); @@ -3226,7 +3248,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ArrayRef Indices(Elts.begin() + 1, Elts.end()); for (Constant *Val : Indices) { Type *ValTy = Val->getType(); - if (!ValTy->getScalarType()->isIntegerTy()) + if (!ValTy->isIntOrIntVectorTy()) return Error(ID.Loc, "getelementptr index must be an integer"); if (ValTy->isVectorTy()) { unsigned ValNumEl = ValTy->getVectorNumElements(); @@ -5697,7 +5719,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVectorTy() && - !LHS->getType()->getScalarType()->isPointerTy()) + !LHS->getType()->isPtrOrPtrVectorTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -6100,7 +6122,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6118,7 +6140,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || ParseTypeAndValue(Val, Loc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6134,7 +6156,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { return Error(ExplicitTypeLoc, "explicit pointee type doesn't match operand's pointee type"); - Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope); + Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6149,7 +6171,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6165,7 +6187,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6181,7 +6203,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Ordering == AtomicOrdering::AcquireRelease) return Error(Loc, "atomic store cannot use Acquire ordering"); - Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope); + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6193,7 +6215,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; AtomicOrdering SuccessOrdering = AtomicOrdering::NotAtomic; AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; bool isWeak = false; @@ -6208,7 +6230,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(Cmp, CmpLoc, PFS) || ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") || ParseTypeAndValue(New, NewLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, SuccessOrdering) || + ParseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) || ParseOrdering(FailureOrdering)) return true; @@ -6231,7 +6253,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { if (!New->getType()->isFirstClassType()) return Error(NewLoc, "cmpxchg operand must be a first class value"); AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst( - Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope); + Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SSID); CXI->setVolatile(isVolatile); CXI->setWeak(isWeak); Inst = CXI; @@ -6245,7 +6267,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Value *Ptr, *Val; LocTy PtrLoc, ValLoc; bool AteExtraComma = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; AtomicRMWInst::BinOp Operation; @@ -6271,7 +6293,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Ptr, PtrLoc, PFS) || ParseToken(lltok::comma, "expected ',' after atomicrmw address") || ParseTypeAndValue(Val, ValLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6288,7 +6310,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { " integer"); AtomicRMWInst *RMWI = - new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope); + new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); RMWI->setVolatile(isVolatile); Inst = RMWI; return AteExtraComma ? InstExtraComma : InstNormal; @@ -6298,8 +6320,8 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'fence' 'singlethread'? AtomicOrdering int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; - if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + SyncScope::ID SSID = SyncScope::System; + if (ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6307,7 +6329,7 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { if (Ordering == AtomicOrdering::Monotonic) return TokError("fence cannot be monotonic"); - Inst = new FenceInst(Context, Ordering, Scope); + Inst = new FenceInst(Context, Ordering, SSID); return InstNormal; } @@ -6349,7 +6371,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!Val->getType()->getScalarType()->isIntegerTy()) + if (!Val->getType()->isIntOrIntVectorTy()) return Error(EltLoc, "getelementptr index must be an integer"); if (Val->getType()->isVectorTy()) { diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 4616c2e86947..d5b059355c42 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -241,8 +241,9 @@ namespace llvm { bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); - bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, + bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); + bool ParseScope(SyncScope::ID &SSID); bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6c8ed7da495d..9c7a06de81b4 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -93,7 +93,7 @@ enum Kind { kw_release, kw_acq_rel, kw_seq_cst, - kw_singlethread, + kw_syncscope, kw_nnan, kw_ninf, kw_nsz, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 1ebef3173135..2b4970a80cdd 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -513,6 +513,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { TBAAVerifier TBAAVerifyHelper; std::vector BundleTags; + SmallVector SSIDs; public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, @@ -648,6 +649,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { Error parseTypeTable(); Error parseTypeTableBody(); Error parseOperandBundleTags(); + Error parseSyncScopeNames(); Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); @@ -668,6 +670,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { Error findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator); + + SyncScope::ID getDecodedSyncScopeID(unsigned Val); }; /// Class to manage reading and parsing function summary index bitcode @@ -998,14 +1002,6 @@ static AtomicOrdering getDecodedOrdering(unsigned Val) { } } -static SynchronizationScope getDecodedSynchScope(unsigned Val) { - switch (Val) { - case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread; - default: // Map unknown scopes to cross-thread. - case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread; - } -} - static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { switch (Val) { default: // Map unknown selection kinds to any. @@ -1745,6 +1741,44 @@ Error BitcodeReader::parseOperandBundleTags() { } } +Error BitcodeReader::parseSyncScopeNames() { + if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID)) + return error("Invalid record"); + + if (!SSIDs.empty()) + return error("Invalid multiple synchronization scope names blocks"); + + SmallVector Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + if (SSIDs.empty()) + return error("Invalid empty synchronization scope names block"); + return Error::success(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Synchronization scope names are implicitly mapped to synchronization + // scope IDs by their order. + + if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME) + return error("Invalid record"); + + SmallString<16> SSN; + if (convertToString(Record, 0, SSN)) + return error("Invalid record"); + + SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN)); + Record.clear(); + } +} + /// Associate a value with its name from the given index in the provided record. Expected BitcodeReader::recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT) { @@ -3132,6 +3166,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, if (Error Err = parseOperandBundleTags()) return Err; break; + case bitc::SYNC_SCOPE_NAMES_BLOCK_ID: + if (Error Err = parseSyncScopeNames()) + return Err; + break; } continue; @@ -4204,7 +4242,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_LOADATOMIC: { - // LOADATOMIC: [opty, op, align, vol, ordering, synchscope] + // LOADATOMIC: [opty, op, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || @@ -4226,12 +4264,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope); + I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; @@ -4260,7 +4298,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_STOREATOMIC: case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: { - // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope] + // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4280,20 +4318,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Acquire || Ordering == AtomicOrdering::AcquireRelease) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope); + I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMPXCHG_OLD: case bitc::FUNC_CODE_INST_CMPXCHG: { - // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope, + // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid, // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; @@ -4310,7 +4348,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (SuccessOrdering == AtomicOrdering::NotAtomic || SuccessOrdering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return Err; @@ -4322,7 +4360,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, - SynchScope); + SSID); cast(I)->setVolatile(Record[OpNum]); if (Record.size() < 8) { @@ -4339,7 +4377,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_ATOMICRMW: { - // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope] + // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid] unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4356,13 +4394,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); - I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); + I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); cast(I)->setVolatile(Record[OpNum+1]); InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] + case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid] if (2 != Record.size()) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[0]); @@ -4370,8 +4408,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Unordered || Ordering == AtomicOrdering::Monotonic) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]); - I = new FenceInst(Context, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]); + I = new FenceInst(Context, Ordering, SSID); InstructionList.push_back(I); break; } @@ -4567,6 +4605,14 @@ Error BitcodeReader::findFunctionInStream( return Error::success(); } +SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) { + if (Val == SyncScope::SingleThread || Val == SyncScope::System) + return SyncScope::ID(Val); + if (Val >= SSIDs.size()) + return SyncScope::System; // Map unknown synchronization scopes to system. + return SSIDs[Val]; +} + //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b2b1ea6de374..0e518d2bbc8f 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -114,6 +114,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// True if a module hash record should be written. bool GenerateHash; + SHA1 Hasher; + /// If non-null, when GenerateHash is true, the resulting hash is written /// into ModHash. When GenerateHash is false, that specified value /// is used as the hash instead of computing from the generated bitcode. @@ -176,6 +178,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } + size_t addToStrtab(StringRef Str); + void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -262,6 +266,7 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { const GlobalObject &GO); void writeModuleMetadataKinds(); void writeOperandBundleTags(); + void writeSyncScopeNames(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); bool pushValueAndType(const Value *V, unsigned InstID, @@ -312,6 +317,10 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { return VE.getValueID(VI.getValue()); } std::map &valueIds() { return GUIDToValueIdMap; } + + unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { + return unsigned(SSID); + } }; /// Class to manage the bitcode writing for a combined index. @@ -481,14 +490,6 @@ static unsigned getEncodedOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid ordering"); } -static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) { - switch (SynchScope) { - case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; - case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; - } - llvm_unreachable("Invalid synch scope"); -} - static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, StringRef Str, unsigned AbbrevToUse) { SmallVector Vals; @@ -947,11 +948,17 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { llvm_unreachable("Invalid unnamed_addr"); } +size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) { + if (GenerateHash) + Hasher.update(Str); + return StrtabBuilder.add(Str); +} + void ModuleBitcodeWriter::writeComdats() { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { // COMDAT: [strtab offset, strtab size, selection_kind] - Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(addToStrtab(C->getName())); Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); @@ -1122,7 +1129,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat, attributes] - Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); @@ -1161,7 +1168,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, paramattrs, alignment, section, visibility, gc, // unnamed_addr, prologuedata, dllstorageclass, comdat, // prefixdata, personalityfn] - Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(addToStrtab(F.getName())); Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); @@ -1191,7 +1198,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalAlias &A : M.aliases()) { // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, // visibility, dllstorageclass, threadlocal, unnamed_addr] - Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(addToStrtab(A.getName())); Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); @@ -1210,7 +1217,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver // val#, linkage, visibility] - Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(addToStrtab(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); @@ -2032,6 +2039,24 @@ void ModuleBitcodeWriter::writeOperandBundleTags() { Stream.ExitBlock(); } +void ModuleBitcodeWriter::writeSyncScopeNames() { + SmallVector SSNs; + M.getContext().getSyncScopeNames(SSNs); + if (SSNs.empty()) + return; + + Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2); + + SmallVector Record; + for (auto SSN : SSNs) { + Record.append(SSN.begin(), SSN.end()); + Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); @@ -2648,7 +2673,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::Store: @@ -2662,7 +2687,8 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back( + getEncodedSyncScopeID(cast(I).getSyncScopeID())); } break; case Instruction::AtomicCmpXchg: @@ -2674,7 +2700,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back( getEncodedOrdering(cast(I).getSuccessOrdering())); Vals.push_back( - getEncodedSynchScope(cast(I).getSynchScope())); + getEncodedSyncScopeID(cast(I).getSyncScopeID())); Vals.push_back( getEncodedOrdering(cast(I).getFailureOrdering())); Vals.push_back(cast(I).isWeak()); @@ -2688,12 +2714,12 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast(I).isVolatile()); Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); Vals.push_back( - getEncodedSynchScope(cast(I).getSynchScope())); + getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Fence: Code = bitc::FUNC_CODE_INST_FENCE; Vals.push_back(getEncodedOrdering(cast(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast(I).getSyncScopeID())); break; case Instruction::Call: { const CallInst &CI = cast(I); @@ -3648,7 +3674,6 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { // Emit the module's hash. // MODULE_CODE_HASH: [5*i32] if (GenerateHash) { - SHA1 Hasher; uint32_t Vals[5]; Hasher.update(ArrayRef((const uint8_t *)&(Buffer)[BlockStartPos], Buffer.size() - BlockStartPos)); @@ -3707,6 +3732,7 @@ void ModuleBitcodeWriter::write() { writeUseListBlock(nullptr); writeOperandBundleTags(); + writeSyncScopeNames(); // Emit function bodies. DenseMap FunctionToBitcodeIndex; diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 344136b1f195..aa9c8e94d08a 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -361,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *NewLI = Builder.CreateLoad(NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); - NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); @@ -444,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); NewSI->setAlignment(SI->getAlignment()); NewSI->setVolatile(SI->isVolatile()); - NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; @@ -801,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSynchScope()); + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -924,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), - CI->getSynchScope()); + CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index faa5f139cf7b..b7fd45a3f6a6 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); + initializeRAFastPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index b50e76f2e3ba..b7155ac2480a 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4270,6 +4270,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; + bool PhiSeen = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; TypePromotionTransaction TPT(RemovedInsts); @@ -4289,6 +4290,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (PHINode *P = dyn_cast(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); + PhiSeen = true; continue; } @@ -4342,9 +4344,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, TPT.commit(); // If all the instructions matched are already in this BB, don't do anything. - if (none_of(AddrModeInsts, [&](Value *V) { + // If we saw Phi node then it is not local definitely. + if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); - })) { + })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -4390,6 +4393,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.Scale = 0; } + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + // + // (See below for code to add the scale.) + if (AddrMode.Scale) { + Type *ScaledRegTy = AddrMode.ScaledReg->getType(); + if (cast(IntPtrTy)->getBitWidth() > + cast(ScaledRegTy)->getBitWidth()) + return false; + } + if (AddrMode.BaseGV) { if (ResultPtr) return false; @@ -4440,19 +4457,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (cast(IntPtrTy)->getBitWidth() < - cast(V->getType())->getBitWidth()) { - V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - // It is only safe to sign extend the BaseReg if we know that the math - // required to create it did not overflow before we extend it. Since - // the original IR value was tossed in favor of a constant back when - // the AddrMode was created we need to bail out gracefully if widths - // do not match instead of extending it. - Instruction *I = dyn_cast_or_null(ResultIndex); - if (I && (ResultIndex != AddrMode.BaseReg)) - I->eraseFromParent(); - return false; + assert(cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth() && + "We can't transform if ScaledReg is too narrow"); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 521037f9d206..ed1bd995e60b 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -345,7 +345,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), Flags, DL->getTypeStoreSize(LI.getType()), getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSynchScope(), LI.getOrdering())); + LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -363,7 +363,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand( MachinePointerInfo(SI.getPointerOperand()), Flags, DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(), + getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), SI.getOrdering())); return true; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 860fc9a4f8b6..bf427225d6a9 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -16,7 +16,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/IR/Constants.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -26,6 +30,9 @@ using namespace llvm; +InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers, nullptr), MIs() {} + InstructionSelector::InstructionSelector() = default; bool InstructionSelector::constrainOperandRegToRegClass( diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 84b0a0ac4157..49fb5e8f075b 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -99,23 +99,19 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( - MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, - const CallLowering::ArgInfo &Result, ArrayRef Args) { +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - MIRBuilder.setInstr(MI); if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), MachineOperand::CreateES(Name), Result, Args)) return LegalizerHelper::UnableToLegalize; - // We're about to remove MI, so move the insert point after it. - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), - std::next(MIRBuilder.getInsertPt())); - - MI.eraseFromParent(); return LegalizerHelper::Legalized; } @@ -123,10 +119,9 @@ static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - return replaceWithLibcall(MI, MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); } LegalizerHelper::LegalizeResult @@ -135,6 +130,8 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -143,15 +140,24 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { Type *HLTy = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } } + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 47c6214c0552..4636806c3f08 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -166,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, .addGlobalAddress(GV); } -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, +MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1) { assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && "invalid operand type"); assert(MRI->getType(Res) == MRI->getType(Op0) && MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - return buildInstr(TargetOpcode::G_ADD) + return buildInstr(Opcode) .addDef(Res) .addUse(Op0) .addUse(Op1); } +MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); +} + MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, unsigned Op1) { assert(MRI->getType(Res).isPointer() && @@ -222,41 +227,22 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_SUB) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_MUL) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); +} - return buildInstr(TargetOpcode::G_AND) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); +MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 3746b74e0528..f9ba4ffa6527 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -67,7 +67,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { } } -void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { +void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 1f1ce6e8d725..58a655a4dee4 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); } +static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '"') + return None; + return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, + ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' @@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) + return R.remaining(); Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 3e9513111bf4..08b82e59c4fc 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -127,7 +127,8 @@ struct MIToken { NamedIRValue, IRValue, QuotedIRValue, // `` - SubRegisterIndex + SubRegisterIndex, + StringConstant }; private: @@ -168,7 +169,8 @@ struct MIToken { bool isMemoryOperandFlag() const { return Kind == kw_volatile || Kind == kw_non_temporal || - Kind == kw_dereferenceable || Kind == kw_invariant; + Kind == kw_dereferenceable || Kind == kw_invariant || + Kind == StringConstant; } bool is(TokenKind K) const { return Kind == K; } diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c58d192284dd..c68d87b15a31 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -141,6 +141,8 @@ class MIParser { StringMap Names2DirectTargetFlags; /// Maps from direct target flag names to the bitmask target flag values. StringMap Names2BitmaskTargetFlags; + /// Maps from MMO target flag names to MMO target flag values. + StringMap Names2MMOTargetFlags; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -229,6 +231,7 @@ class MIParser { bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); @@ -318,6 +321,18 @@ class MIParser { /// /// Return true if the name isn't a name of a bitmask target flag. bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2MMOTargetFlags(); + + /// Try to convert a name of a MachineMemOperand target flag to the + /// corresponding target flag. + /// + /// Return true if the name isn't a name of a target MMO flag. + bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); + + /// parseStringConstant + /// ::= StringConstant + bool parseStringConstant(std::string &Result); }; } // end anonymous namespace @@ -2034,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { case MIToken::kw_invariant: Flags |= MachineMemOperand::MOInvariant; break; - // TODO: parse the target specific memory operand flags. + case MIToken::StringConstant: { + MachineMemOperand::Flags TF; + if (getMMOTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target MMO flag '" + Token.stringValue() + + "'"); + Flags |= TF; + break; + } default: llvm_unreachable("The current token should be a memory operand flag"); } @@ -2135,6 +2157,26 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { return false; } +bool MIParser::parseOptionalScope(LLVMContext &Context, + SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") { + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected '(' in syncscope"); + + std::string SSN; + if (parseStringConstant(SSN)) + return true; + + SSID = Context.getOrInsertSyncScopeID(SSN); + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' in syncscope"); + } + + return false; +} + bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { Order = AtomicOrdering::NotAtomic; if (Token.isNot(MIToken::Identifier)) @@ -2174,12 +2216,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); - // Optional "singlethread" scope. - SynchronizationScope Scope = SynchronizationScope::CrossThread; - if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") { - Scope = SynchronizationScope::SingleThread; - lex(); - } + // Optional synchronization scope. + SyncScope::ID SSID; + if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). AtomicOrdering Order, FailureOrder; @@ -2244,7 +2284,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::rparen)) return true; Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, - Scope, Order, FailureOrder); + SSID, Order, FailureOrder); return false; } @@ -2457,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { return false; } +void MIParser::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool MIParser::parseStringConstant(std::string &Result) { + if (Token.isNot(MIToken::StringConstant)) + return error("expected string constant"); + Result = Token.stringValue(); + lex(); + return false; +} + bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, StringRef Src, SMDiagnostic &Error) { diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index c524a9835f33..ddeacf1d1bfb 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -139,6 +140,8 @@ class MIPrinter { ModuleSlotTracker &MST; const DenseMap &RegisterMaskIds; const DenseMap &StackObjectOperandMapping; + /// Synchronization scope names registered with LLVMContext. + SmallVector SSNs; bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; bool canPredictSuccessors(const MachineBasicBlock &MBB) const; @@ -162,7 +165,9 @@ class MIPrinter { void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); - void print(const MachineMemOperand &Op); + void print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op); + void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; @@ -731,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) { if (!MI.memoperands_empty()) { OS << " :: "; + const LLVMContext &Context = MF->getFunction()->getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - print(*Op); + print(Context, *TII, *Op); NeedComma = true; } } @@ -1031,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } -void MIPrinter::print(const MachineMemOperand &Op) { +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, + unsigned TMMOFlag) { + auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TMMOFlag) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op) { OS << '('; - // TODO: Print operand's target specific flags. if (Op.isVolatile()) OS << "volatile "; if (Op.isNonTemporal()) @@ -1042,6 +1059,15 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "dereferenceable "; if (Op.isInvariant()) OS << "invariant "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) + << "\" "; if (Op.isLoad()) OS << "load "; else { @@ -1049,8 +1075,7 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "store "; } - if (Op.getSynchScope() == SynchronizationScope::SingleThread) - OS << "singlethread "; + printSyncScope(Context, Op.getSyncScopeID()); if (Op.getOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(Op.getOrdering()) << ' '; @@ -1119,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << ')'; } +void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + OS << "syncscope(\""; + PrintEscapedString(SSNs[SSID], OS); + OS << "\") "; + break; + } + } +} + static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, const TargetRegisterInfo *TRI) { int Reg = TRI->getLLVMRegNum(DwarfReg, true); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 2d4b95974cc6..447ad629885b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1917,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, return; MachineBasicBlock *Top = *LoopChain.begin(); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + + // If ExitingBB is already the last one in a chain then nothing to do. + if (Bottom == ExitingBB) + return; + bool ViableTopFallthrough = false; for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1931,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock *Succ : Bottom->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; if (!LoopBlockSet.count(Succ) && @@ -1944,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is broken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + // Please note that there is no exit fallthrough from Bn because we checked it + // above. + if (ViableTopFallthrough) { + assert(std::next(ExitIt) != LoopChain.end() && + "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + if (ExitingBB->isSuccessor(NextBlockInChain)) + if (!Bottom->isSuccessor(Top)) + return; + } + + DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index bbdae6e1a49e..f88e175a9776 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -305,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, AtomicOrdering Ordering, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, - SynchScope, Ordering, FailureOrdering); + SSID, Ordering, FailureOrdering); } MachineMemOperand * @@ -320,13 +320,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -359,7 +359,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustLoad; } @@ -393,7 +393,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustStore; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 81c6dace92e0..afea5575a3ae 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -447,6 +447,14 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallString<16> Str; getFPImm()->getValueAPF().toString(Str); OS << "quad " << Str; + } else if (getFPImm()->getType()->isX86_FP80Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + OS << "x86_fp80 0xK"; + APInt API = APF.bitcastToAPInt(); + OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); } else { OS << getFPImm()->getValueAPF().convertToDouble(); } @@ -606,7 +614,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), @@ -617,8 +625,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); - AtomicInfo.SynchScope = static_cast(SynchScope); - assert(getSynchScope() == SynchScope && "Value truncated"); + AtomicInfo.SSID = static_cast(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); AtomicInfo.Ordering = static_cast(Ordering); assert(getOrdering() == Ordering && "Value truncated"); AtomicInfo.FailureOrdering = static_cast(FailureOrdering); @@ -744,6 +752,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { OS << "(dereferenceable)"; if (isInvariant()) OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index e65c256c1bb5..fcb544806dda 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -985,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); + else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + const MachineOperand &MOTied = MI->getOperand(TiedTo); + if (!MOTied.isReg()) + report("Tied counterpart must be a register", &MOTied, TiedTo); + else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + MO->getReg() != MOTied.getReg()) + report("Tied physical registers must match.", &MOTied, TiedTo); + } } else if (MO->isReg() && MO->isTied()) report("Explicit operand should not be tied", MO, MONum); } else { diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 5e279b065bbd..633a853b2c74 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" STATISTIC(NumFused, "Number of instr pairs fused"); diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 425a59dc0375..4a50d895340a 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -23,7 +23,7 @@ /// This pass traverses all the instructions in a program in top-down order. /// In contrast to the instruction scheduling passes, this pass never resets /// the hazard recognizer to ensure it can correctly handles noop hazards at -/// the begining of blocks. +/// the beginning of blocks. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c606b7b83310..d5538be4bba2 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -203,6 +203,8 @@ namespace { char RAFast::ID = 0; } +INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false) + /// getStackSpaceFor - This allocates space for the specified virtual register /// to be held on the stack. int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { @@ -244,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) { if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { if (MO.getReg() == LR.PhysReg) MO.setIsKill(); - else - LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + // else, don't do anything we are problably redefining a + // subreg of this register and given we don't track which + // lanes are actually dead, we cannot insert a kill flag here. + // Otherwise we may end up in a situation like this: + // ... = (MO) physreg:sub1, physreg + // ... <== Here we would allow later pass to reuse physreg:sub1 + // which is potentially wrong. + // LR:sub0 = ... + // ... = LR.sub1 <== This is going to use physreg:sub1 } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 9562652556ac..020e81eca2dd 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -2458,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { do { Reg = RecoloringCandidates.pop_back_val(); - // We cannot recolor physcal register. + // We cannot recolor physical register. if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index e3baff4be4bc..9778103575fa 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -924,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { return createPBQPRegisterAllocator(); } - -#undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ff9bca092dbe..a67d07b36474 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1227,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR->createDeadDef(DefIndex, Alloc); } } + + // Make sure that the subrange for resultant undef is removed + // For example: + // vreg1:sub1 = LOAD CONSTANT 1 + // vreg2 = COPY vreg1 + // ==> + // vreg2:sub1 = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for vreg2:sub0 + // ; as it is now undef + if (NewIdx != 0 && DstInt.hasSubRanges()) { + // The affected subregister segments can be removed. + SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); + bool UpdatedSubRanges = false; + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + if ((SR.LaneMask & DstMask).none()) { + DEBUG(dbgs() << "Removing undefined SubRange " + << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); + // VNI is in ValNo - remove any segments in this SubRange that have this ValNo + if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { + SR.removeValNo(RmValNo); + UpdatedSubRanges = true; + } + } + } + if (UpdatedSubRanges) + DstInt.removeEmptySubRanges(); + } } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 05e641d9489d..fc5105aadbff 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -375,7 +375,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, static std::pair findSurvivorBackwards(const MachineRegisterInfo &MRI, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, - const LiveRegUnits &LiveOut, ArrayRef AllocationOrder) { + const LiveRegUnits &LiveOut, ArrayRef AllocationOrder, + bool RestoreAfter) { bool FoundTo = false; MCPhysReg Survivor = 0; MachineBasicBlock::iterator Pos; @@ -388,7 +389,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, for (MachineBasicBlock::iterator I = From;; --I) { const MachineInstr &MI = *I; - Used.accumulateBackward(MI); + Used.accumulate(MI); if (I == To) { // See if one of the registers in RC wasn't used so far. @@ -401,6 +402,11 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // the register which is not defined/used for the longest time. FoundTo = true; Pos = To; + // Note: It was fine so far to start our search at From, however now that + // we have to spill, and can only place the restore after From then + // add the regs used/defed by std::next(From) to the set. + if (RestoreAfter) + Used.accumulate(*std::next(From)); } if (FoundTo) { if (Survivor == 0 || !Used.available(Survivor)) { @@ -575,7 +581,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator UseMI; ArrayRef AllocationOrder = RC.getRawAllocationOrder(MF); std::pair P = - findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder, + RestoreAfter); MCPhysReg Reg = P.first; MachineBasicBlock::iterator SpillBefore = P.second; assert(Reg != 0 && "No register left to scavenge!"); @@ -626,7 +633,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, assert(RealDef != nullptr && "Must have at least 1 Def"); #endif - // We should only have one definition of the register. However to accomodate + // We should only have one definition of the register. However to accommodate // the requirements of two address code we also allow definitions in // subsequent instructions provided they also read the register. That way // we get a single contiguous lifetime. diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3cd270cec3a6..5e95f760aaa2 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { return &TII->get(Node->getMachineOpcode()); } +LLVM_DUMP_METHOD +raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + switch (getKind()) { + case Data: OS << "Data"; break; + case Anti: OS << "Anti"; break; + case Output: OS << "Out "; break; + case Order: OS << "Ord "; break; + } + + switch (getKind()) { + case Data: + OS << " Latency=" << getLatency(); + if (TRI && isAssignedRegDep()) + OS << " Reg=" << PrintReg(getReg(), TRI); + break; + case Anti: + case Output: + OS << " Latency=" << getLatency(); + break; + case Order: + OS << " Latency=" << getLatency(); + switch(Contents.OrdKind) { + case Barrier: OS << " Barrier"; break; + case MayAliasMem: + case MustAliasMem: OS << " Memory"; break; + case Artificial: OS << " Artificial"; break; + case Weak: OS << " Weak"; break; + case Cluster: OS << " Cluster"; break; + } + break; + } + + return OS; +} + bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SDep &PredDep : Preds) { @@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { - if (this == &DAG->ExitSU) - OS << "ExitSU"; - else if (this == &DAG->EntrySU) +raw_ostream &SUnit::print(raw_ostream &OS, + const SUnit *Entry, const SUnit *Exit) const { + if (this == Entry) OS << "EntrySU"; + else if (this == Exit) + OS << "ExitSU"; else OS << "SU(" << NodeNum << ")"; + return OS; } -LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const { +LLVM_DUMP_METHOD +raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { + return print(OS, &G->EntrySU, &G->ExitSU); +} + +LLVM_DUMP_METHOD +void SUnit::dump(const ScheduleDAG *G) const { print(dbgs(), G); dbgs() << ": "; G->dumpNode(this); @@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; - for (const SDep &SuccDep : Preds) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Preds) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; - for (const SDep &SuccDep : Succs) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Succs) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 0f70b0e9ca07..ccd937950a74 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -63,7 +63,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d901af727686..71382c18fdf9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -400,6 +400,7 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); @@ -5267,14 +5268,40 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { } SDValue DAGCombiner::visitRotate(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold (rot x, 0) -> x + if (isNullConstantOrNullSplatConstant(N1)) + return N0; + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). - if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && - N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - if (SDValue NewOp1 = - distributeTruncateThroughAnd(N->getOperand(1).getNode())) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), NewOp1); + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) + if (SDNode *C2 = + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) { + unsigned Bitsize = VT.getScalarSizeInBits(); + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode()); + return DAG.getNode( + N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); + } + } return SDValue(); } @@ -6091,19 +6118,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); + SDLoc DL(N); // fold (select C, X, X) -> X if (N1 == N2) return N1; + if (const ConstantSDNode *N0C = dyn_cast(N0)) { // fold (select true, X, Y) -> X // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } + // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::OR, DL, VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) return V; @@ -6112,22 +6142,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); } // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::AND, DL, VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) - return SDValue(N, 0); // Don't revisit N. + return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: @@ -6138,27 +6168,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. - bool normalizeToSequence - = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, InnerSelect, N2); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, InnerSelect); } @@ -6170,15 +6200,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), - N0, N1_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, - N1_1, N2); + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1_1, N2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, + N2); } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y @@ -6189,15 +6217,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { - SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), - N0, N2_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, - N1, N2_2); + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1, N2_2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, + N2_2); } } } @@ -6208,8 +6234,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (auto *C = dyn_cast(N0->getOperand(1))) { SDValue Cond0 = N0->getOperand(0); if (C->isOne()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), - Cond0, N2, N1); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); } } } @@ -6226,24 +6251,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && + if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast(N0.getOperand(2))->get(); - if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, CC, - TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum( + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); + return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, N0.getOperand(2)); + return SimplifySelect(DL, N0, N1, N2); } return SDValue(); @@ -11045,7 +11067,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the - // indexed load/store and the expresion that needs to be re-written. + // indexed load/store and the expression that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 @@ -11379,7 +11401,7 @@ namespace { /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// -/// Then, it will be rewriten into: +/// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// @@ -12694,7 +12716,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12706,7 +12728,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12723,7 +12745,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12781,7 +12804,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12898,7 +12922,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12912,7 +12936,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12926,7 +12950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, @@ -14228,6 +14252,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations which can be matched to a truncate. +SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + if (N->getNumOperands() < 2) + return SDValue(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElems = N->getNumOperands(); + + if (!isTypeLegal(VT)) + return SDValue(); + + // If the input is something other than an EXTRACT_VECTOR_ELT with a constant + // index, bail out. + // TODO: Allow undef elements in some cases? + if (any_of(N->ops(), [VT](SDValue Op) { + return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa(Op.getOperand(1)) || + Op.getValueType() != VT.getVectorElementType(); + })) + return SDValue(); + + // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index + auto GetExtractIdx = [](SDValue Extract) { + return cast(Extract.getOperand(1))->getSExtValue(); + }; + + // The first BUILD_VECTOR operand must be an an extract from index zero + // (assuming no undef and little-endian). + if (GetExtractIdx(N->getOperand(0)) != 0) + return SDValue(); + + // Compute the stride from the first index. + int Stride = GetExtractIdx(N->getOperand(1)); + SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); + + // Proceed only if the stride and the types can be matched to a truncate. + if ((Stride == 1 || !isPowerOf2_32(Stride)) || + (ExtractedFromVec.getValueType().getVectorNumElements() != + Stride * NumElems) || + (VT.getScalarSizeInBits() * Stride > 64)) + return SDValue(); + + // Check remaining operands are consistent with the computed stride. + for (unsigned i = 1; i != NumElems; ++i) { + SDValue Op = N->getOperand(i); + + if ((Op.getOperand(0) != ExtractedFromVec) || + (GetExtractIdx(Op) != Stride * i)) + return SDValue(); + } + + // All checks were ok, construct the truncate. + LLVMContext &Ctx = *DAG.getContext(); + EVT NewVT = VT.getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); + EVT TruncVT = + VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; + + SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); + Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); + return DAG.getBitcast(VT, Res); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14270,6 +14361,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; + if (TLI.isDesirableToCombineBuildVectorToTruncate()) + if (SDValue V = reduceBuildVecToTrunc(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b235e19aaab2..b96c96f0b4df 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - // Add the subregster being inserted + // Add the subregister being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 873b2bd48f1e..7e4bc3ccb5d3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1991,7 +1991,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -2029,7 +2030,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -3565,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1, dl)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, make sure the - // node has no more uses. The above EXTRACT_ELEMENT nodes should have been - // folded. - assert(Ret->use_empty() && - "Unexpected uses of illegally type from expanded lib call."); + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); } if (isSigned) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c1cb5d9b5235..eaf177d0661b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - // If R is null, the sub-method took care of registering the result. - if (R.getNode()) { + if (R.getNode() && R.getNode() != N) { SetSoftenedFloat(SDValue(N, ResNo), R); - ReplaceSoftenFloatResult(N, ResNo, R); + // Return true only if the node is changed, assuming that the operands + // are also converted when necessary. + return true; } - // Return true only if the node is changed, - // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. - return R.getNode() && R.getNode() != N; + return false; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { @@ -753,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; + case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: @@ -791,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) return false; - // When the operand type can be kept in registers, SoftenFloatResult - // will call ReplaceValueWith to replace all references and we can - // skip softening this operand. + + // When the operand type can be kept in registers there is nothing to do for + // the following opcodes. switch (N->getOperand(OpNo).getOpcode()) { case ISD::BITCAST: case ISD::ConstantFP: @@ -807,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: return true; } - // For some opcodes, SoftenFloatResult handles all conversion of softening - // and replacing operands, so that there is no need to soften operands - // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: + case ISD::ConstantFP: // Leaf node. + case ISD::CopyFromReg: // Operand is a register that we know to be left + // unchanged by SoftenFloatResult(). + case ISD::Register: // Leaf node. return true; } return false; @@ -829,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + if (N->getNumOperands() == 3) + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, + N->getOperand(3)), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { // If we get here, the result must be legal but the source illegal. EVT SVT = N->getOperand(0).getValueType(); @@ -884,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + + if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -913,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 154af46c9446..001eed9fb8f6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); + EVT VT = Res.getValueType(); bool Failed = false; unsigned Mapped = 0; @@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - if (Mapped == 0) { + // If the value can be kept in HW registers, softening machinery can + // leave it unchanged and don't put it to any map. + if (Mapped == 0 && + !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && + isLegalInHWReg(VT))) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -331,11 +336,6 @@ bool DAGTypeLegalizer::run() { if (NeedsReanalyzing) { assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); - // Remove any result values from SoftenedFloats as N will be revisited - // again. - for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) - SoftenedFloats.erase(SDValue(N, i)); - N->setNodeId(NewNode); // Recompute the NodeId and correct processed operands, adding the node to // the worklist if ready. @@ -754,8 +754,6 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // new uses of From due to CSE. If this happens, replace the new uses of // From with To. } while (!From.use_empty()); - - SoftenedFloats.erase(From); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8e999188d8e1..e102df5e913d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -416,16 +416,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. - void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { - // When the result type can be kept in HW registers, the converted - // NewRes node could have the same type. We can save the effort in - // cloning every user of N in SoftenFloatOperand or other legalization functions, - // by calling ReplaceValueWith here to update all users. - if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) - ReplaceValueWith(SDValue(N, ResNo), NewRes); - } - // Convert Float Results to Integer for Non-HW-supported Operations. bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -471,17 +461,23 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); // Return true if we can skip softening the given operand or SDNode because - // it was soften before by SoftenFloatResult and references to the operand - // were replaced by ReplaceValueWith. + // either it was soften before by SoftenFloatResult and references to the + // operand were replaced by ReplaceValueWith or it's value type is legal in HW + // registers and the operand can be left unchanged. bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FABS(SDNode *N); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index aa69e0e2adfc..f3306151d864 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the floating point operand only if it was converted to integers. // Otherwise, it is a legal type like f128 that can be saved in a register. auto SoftenedOp = GetSoftenedFloat(InOp); - if (SoftenedOp == InOp) + if (isLegalInHWReg(SoftenedOp.getValueType())) break; SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ff0e609803d8..d41054b15bbc 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2977,7 +2977,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. unsigned InMaskOpc = InMask->getOpcode(); + + // FIXME: This code seems to be too restrictive, we might consider + // generalizing it or dropping it. assert((InMaskOpc == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || (isLogicalMaskOp(InMaskOpc) && isSETCCorConvertedSETCC(InMask->getOperand(0)) && isSETCCorConvertedSETCC(InMask->getOperand(1)))) && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98553152117d..823e77850c4b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -5442,7 +5443,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + AtomicOrdering FailureOrdering, SyncScope::ID SSID) { assert(Opcode == ISD::ATOMIC_CMP_SWAP || Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -5458,7 +5459,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SynchScope, SuccessOrdering, + AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); @@ -5480,7 +5481,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -5500,7 +5501,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SynchScope, Ordering); + nullptr, SSID, Ordering); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); } @@ -7630,45 +7631,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FI = cast(Loc)->getIndex(); - int BFI = cast(BaseLoc)->getIndex(); - int FS = MFI.getObjectSize(FI); - int BFS = MFI.getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); - } - // Handle X + C. - if (isBaseWithConstantOffset(Loc)) { - int64_t LocOffset = cast(Loc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc) { - // If the base location is a simple address with no offset itself, then - // the second load's first add operand should be the base address. - if (LocOffset == Dist * (int)Bytes) - return true; - } else if (isBaseWithConstantOffset(BaseLoc)) { - // The base location itself has an offset, so subtract that value from the - // second load's offset before comparing to distance * size. - int64_t BOffset = - cast(BaseLoc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { - if ((LocOffset - BOffset) == Dist * (int)Bytes) - return true; - } - } - } - const GlobalValue *GV1 = nullptr; - const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); + auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); + auto LocDecomp = BaseIndexOffset::match(Loc, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); return false; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 4e899ae6668e..0d69441ebb7f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -37,13 +37,13 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // Match non-equal FrameIndexes - a FrameIndex stemming from an - // alloca will not have it's ObjectOffset set until post-DAG and - // as such we must assume the two framesIndices are incomparable. + // Match non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. if (auto *A = dyn_cast(Base)) if (auto *B = dyn_cast(Other.Base)) - if (!MFI.getObjectAllocation(A->getIndex()) && - !MFI.getObjectAllocation(B->getIndex())) { + if (MFI.isFixedObjectIndex(A->getIndex()) && + MFI.isFixedObjectIndex(B->getIndex())) { Off += MFI.getObjectOffset(B->getIndex()) - MFI.getObjectOffset(A->getIndex()); return true; @@ -60,12 +60,18 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { int64_t Offset = 0; bool IsIndexSignExt = false; - // Consume constant adds - while (Base->getOpcode() == ISD::ADD && - isa(Base->getOperand(1))) { - int64_t POffset = cast(Base->getOperand(1))->getSExtValue(); - Offset += POffset; - Base = Base->getOperand(0); + // Consume constant adds & ors with appropriate masking. + while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { + if (auto *C = dyn_cast(Base->getOperand(1))) { + // Only consider ORs which act as adds. + if (Base->getOpcode() == ISD::OR && + !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) + break; + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; } if (Base->getOpcode() == ISD::ADD) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index acf68fbbdedf..41c3f5f235ea 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3220,7 +3220,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef Indices; + if (const InsertValueInst *IV = dyn_cast(&I)) + Indices = IV->getIndices(); + else + Indices = cast(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -3228,7 +3234,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa(Op0); bool FromUndef = isa(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector AggValueVTs; @@ -3268,13 +3274,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef Indices; + if (const ExtractValueInst *EV = dyn_cast(&I)) + Indices = EV->getIndices(); + else + Indices = cast(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector ValValueVTs; @@ -3559,6 +3571,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3688,6 +3701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -3978,7 +3992,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3988,7 +4002,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -4014,7 +4028,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4025,7 +4039,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -4040,7 +4054,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4048,7 +4062,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4066,7 +4080,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), - AAMDNodes(), nullptr, Scope, Order); + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = @@ -4083,7 +4097,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4100,7 +4114,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -4982,6 +4996,83 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); @@ -7842,6 +7933,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector OldRetTys = std::move(RetTys); + SmallVector OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7924,6 +8031,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = cast(Args[i].Ty)->getElementType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 431d52b4b9b9..ac1d6aae65a5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -38,7 +38,6 @@ class BranchInst; class CallInst; class DbgValueInst; class ExtractElementInst; -class ExtractValueInst; class FCmpInst; class FPExtInst; class FPToSIInst; @@ -53,7 +52,6 @@ class IntToPtrInst; class IndirectBrInst; class InvokeInst; class InsertElementInst; -class InsertValueInst; class Instruction; class LoadInst; class MachineBasicBlock; @@ -859,8 +857,8 @@ class SelectionDAGBuilder { void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const InsertValueInst &I); + void visitExtractValue(const User &I); + void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &I); void visitGetElementPtr(const User &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f711ca71f79f..bdf57e805842 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1483,7 +1483,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->selectInstruction(Inst)) { - FastISelFailed = true; --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1506,8 +1505,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } + FastISelFailed = true; + // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa(Inst)) { + // We cannot separate out GCrelocates to their own blocks since we need + // to keep track of gc-relocates for a particular gc-statepoint. This is + // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before + // visitGCRelocate. + if (isa(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 7886737b879c..17a3a84ecda5 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -125,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - for (BasicBlock *PredBB : predecessors(BB)) - MarkBlocksLiveIn(PredBB, LiveBBs); + df_iterator_default_set Visited; + + for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + LiveBBs.insert(B); + } /// substituteLPadValues - Substitute the values returned by the landingpad diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 008b984dd961..323045fd2aaa 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -53,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, std::pair &LIP = LastInsertPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); - SmallVector EHPadSucessors; + SmallVector EHPadSuccessors; for (const MachineBasicBlock *SMBB : MBB.successors()) if (SMBB->isEHPad()) - EHPadSucessors.push_back(SMBB); + EHPadSuccessors.push_back(SMBB); // Compute insert points on the first call. The pair is independent of the // current live interval. @@ -68,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, LIP.first = LIS.getInstructionIndex(*FirstTerm); // If there is a landing pad successor, also find the call instruction. - if (EHPadSucessors.empty()) + if (EHPadSuccessors.empty()) return LIP.first; // There may not be a call instruction (?) in which case we ignore LPad. LIP.second = LIP.first; @@ -87,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!LIP.second) return LIP.first; - if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) { + if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) { return LIS.isLiveInToMBB(CurLI, EHPad); })) return LIP.first; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e9d38c10c860..3914ee514712 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -384,6 +384,26 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { "__llvm_memcpy_element_unordered_atomic_8"; Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = "__llvm_memcpy_element_unordered_atomic_16"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memmove_element_unordered_atomic_1"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memmove_element_unordered_atomic_2"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memmove_element_unordered_atomic_4"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memmove_element_unordered_atomic_8"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -803,6 +823,40 @@ RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } } +RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index b9fa9b6a6ad7..c2c02f8de03f 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -62,6 +62,18 @@ class CVSymbolDumperImpl : public SymbolVisitorCallbacks { }; } +static StringRef getSymbolKindName(SymbolKind Kind) { + switch (Kind) { +#define SYMBOL_RECORD(EnumName, EnumVal, Name) \ + case EnumName: \ + return #Name; +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + break; + } + return "UnknownSym"; +} + void CVSymbolDumperImpl::printLocalVariableAddrRange( const LocalVariableAddrRange &Range, uint32_t RelocationOffset) { DictScope S(W, "LocalVariableAddrRange"); @@ -86,18 +98,23 @@ void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) { } Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) { + W.startLine() << getSymbolKindName(CVR.Type); + W.getOStream() << " {\n"; + W.indent(); + W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames()); return Error::success(); } Error CVSymbolDumperImpl::visitSymbolEnd(CVSymbol &CVR) { if (PrintRecordBytes && ObjDelegate) ObjDelegate->printBinaryBlockWithRelocs("SymData", CVR.content()); + + W.unindent(); + W.startLine() << "}\n"; return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { - DictScope S(W, "BlockStart"); - StringRef LinkageName; W.printHex("PtrParent", Block.Parent); W.printHex("PtrEnd", Block.End); @@ -113,7 +130,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { - DictScope S(W, "Thunk32"); W.printNumber("Parent", Thunk.Parent); W.printNumber("End", Thunk.End); W.printNumber("Next", Thunk.Next); @@ -126,7 +142,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, TrampolineSym &Tramp) { - DictScope S(W, "Trampoline"); W.printEnum("Type", uint16_t(Tramp.Type), getTrampolineNames()); W.printNumber("Size", Tramp.Size); W.printNumber("ThunkOff", Tramp.ThunkOffset); @@ -137,7 +152,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { - DictScope S(W, "Section"); W.printNumber("SectionNumber", Section.SectionNumber); W.printNumber("Alignment", Section.Alignment); W.printNumber("Rva", Section.Rva); @@ -152,7 +166,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CoffGroupSym &CoffGroup) { - DictScope S(W, "COFF Group"); W.printNumber("Size", CoffGroup.Size); W.printFlags("Characteristics", CoffGroup.Characteristics, getImageSectionCharacteristicNames(), @@ -165,8 +178,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BPRelativeSym &BPRel) { - DictScope S(W, "BPRelativeSym"); - W.printNumber("Offset", BPRel.Offset); printTypeIndex("Type", BPRel.Type); W.printString("VarName", BPRel.Name); @@ -175,16 +186,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BuildInfoSym &BuildInfo) { - DictScope S(W, "BuildInfo"); - W.printNumber("BuildId", BuildInfo.BuildId); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallSiteInfoSym &CallSiteInfo) { - DictScope S(W, "CallSiteInfo"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -200,8 +207,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, EnvBlockSym &EnvBlock) { - DictScope S(W, "EnvBlock"); - ListScope L(W, "Entries"); for (auto Entry : EnvBlock.Fields) { W.printString(Entry); @@ -211,7 +216,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FileStatic) { - DictScope S(W, "FileStatic"); printTypeIndex("Index", FileStatic.Index); W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset); W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames()); @@ -220,7 +224,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { - DictScope S(W, "Export"); W.printNumber("Ordinal", Export.Ordinal); W.printFlags("Flags", uint16_t(Export.Flags), getExportSymFlagNames()); W.printString("Name", Export.Name); @@ -229,8 +232,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile2Sym &Compile2) { - DictScope S(W, "CompilerFlags2"); - W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames()); W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames()); @@ -254,8 +255,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile3Sym &Compile3) { - DictScope S(W, "CompilerFlags3"); - W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames()); W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames()); @@ -281,8 +280,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ConstantSym &Constant) { - DictScope S(W, "Constant"); - printTypeIndex("Type", Constant.Type); W.printNumber("Value", Constant.Value); W.printString("Name", Constant.Name); @@ -290,9 +287,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { - DictScope S(W, "DataSym"); - - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -308,15 +302,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) { - DictScope S(W, "DefRangeFramePointerRelFullScope"); W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - DictScope S(W, "DefRangeFramePointerRel"); - W.printNumber("Offset", DefRangeFramePointerRel.Offset); printLocalVariableAddrRange(DefRangeFramePointerRel.Range, DefRangeFramePointerRel.getRelocationOffset()); @@ -326,8 +317,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) { - DictScope S(W, "DefRangeRegisterRel"); - W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register); W.printBoolean("HasSpilledUDTMember", DefRangeRegisterRel.hasSpilledUDTMember()); @@ -341,8 +330,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) { - DictScope S(W, "DefRangeRegister"); - W.printNumber("Register", DefRangeRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName); printLocalVariableAddrRange(DefRangeRegister.Range, @@ -353,8 +340,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) { - DictScope S(W, "DefRangeSubfieldRegister"); - W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName); W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent); @@ -366,8 +351,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) { - DictScope S(W, "DefRangeSubfield"); - if (ObjDelegate) { DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); @@ -387,8 +370,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DefRangeSym &DefRange) { - DictScope S(W, "DefRange"); - if (ObjDelegate) { DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRange.Program); @@ -406,8 +387,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FrameCookie) { - DictScope S(W, "FrameCookie"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -423,8 +402,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FrameProc) { - DictScope S(W, "FrameProc"); - W.printHex("TotalFrameBytes", FrameProc.TotalFrameBytes); W.printHex("PaddingFrameBytes", FrameProc.PaddingFrameBytes); W.printHex("OffsetToPadding", FrameProc.OffsetToPadding); @@ -440,8 +417,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) { - DictScope S(W, "HeapAllocationSite"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -458,8 +433,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &InlineSite) { - DictScope S(W, "InlineSite"); - W.printHex("PtrParent", InlineSite.Parent); W.printHex("PtrEnd", InlineSite.End); printTypeIndex("Inlinee", InlineSite.Inlinee); @@ -515,7 +488,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegisterSym &Register) { - DictScope S(W, "RegisterSym"); printTypeIndex("Type", Register.Index); W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames()); W.printString("Name", Register.Name); @@ -523,7 +495,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { - DictScope S(W, "PublicSym"); W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames()); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); @@ -532,7 +503,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { - DictScope S(W, "ProcRef"); W.printNumber("SumName", ProcRef.SumName); W.printNumber("SymOffset", ProcRef.SymOffset); W.printNumber("Mod", ProcRef.Module); @@ -541,8 +511,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { - DictScope S(W, "Label"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", Label.getRelocationOffset(), @@ -558,8 +526,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { - DictScope S(W, "Local"); - printTypeIndex("Type", Local.Type); W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames()); W.printString("VarName", Local.Name); @@ -567,16 +533,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ObjNameSym &ObjName) { - DictScope S(W, "ObjectName"); - W.printHex("Signature", ObjName.Signature); W.printString("ObjectName", ObjName.Name); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { - DictScope S(W, "ProcStart"); - if (InFunctionScope) return llvm::make_error( "Visiting a ProcSym while inside function scope!"); @@ -584,7 +546,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { InFunctionScope = true; StringRef LinkageName; - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printHex("PtrParent", Proc.Parent); W.printHex("PtrEnd", Proc.End); W.printHex("PtrNext", Proc.Next); @@ -607,13 +568,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ScopeEndSym &ScopeEnd) { - if (CVR.kind() == SymbolKind::S_END) - DictScope S(W, "BlockEnd"); - else if (CVR.kind() == SymbolKind::S_PROC_ID_END) - DictScope S(W, "ProcEnd"); - else if (CVR.kind() == SymbolKind::S_INLINESITE_END) - DictScope S(W, "InlineSiteEnd"); - InFunctionScope = false; return Error::success(); } @@ -627,8 +581,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegRelativeSym &RegRel) { - DictScope S(W, "RegRelativeSym"); - W.printHex("Offset", RegRel.Offset); printTypeIndex("Type", RegRel.Type); W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames()); @@ -638,8 +590,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ThreadLocalDataSym &Data) { - DictScope S(W, "ThreadLocalDataSym"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -653,15 +603,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { - DictScope S(W, "UDT"); printTypeIndex("Type", UDT.Type); W.printString("UDTName", UDT.Name); return Error::success(); } Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { - DictScope S(W, "UnknownSym"); - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printNumber("Length", CVR.length()); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 72cb9e2e3544..0d935c4472ae 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -382,6 +382,13 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, case SymbolKind::S_BUILDINFO: Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags break; + case SymbolKind::S_LTHREAD32: + case SymbolKind::S_GTHREAD32: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_FILESTATIC: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; case SymbolKind::S_LOCAL: Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type break; @@ -403,6 +410,10 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, case SymbolKind::S_INLINESITE: Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee break; + case SymbolKind::S_HEAPALLOCSITE: + // FIXME: It's not clear if this is a type or item reference. + Refs.push_back({TiRefKind::IndexRef, 8, 1}); // signature + break; // Defranges don't have types, just registers and code offsets. case SymbolKind::S_DEFRANGE_REGISTER: @@ -419,6 +430,7 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, case SymbolKind::S_COMPILE: case SymbolKind::S_COMPILE2: case SymbolKind::S_COMPILE3: + case SymbolKind::S_ENVBLOCK: case SymbolKind::S_BLOCK32: case SymbolKind::S_FRAMEPROC: break; diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index a18d4efec07a..495e09fbae35 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -591,10 +591,10 @@ void DWARFContext::parseCompileUnits() { void DWARFContext::parseTypeUnits() { if (!TUs.empty()) return; - for (const auto &I : getTypesSections()) { + forEachTypesSections([&](const DWARFSection &S) { TUs.emplace_back(); - TUs.back().parse(*this, I.second); - } + TUs.back().parse(*this, S); + }); } void DWARFContext::parseDWOCompileUnits() { @@ -604,10 +604,10 @@ void DWARFContext::parseDWOCompileUnits() { void DWARFContext::parseDWOTypeUnits() { if (!DWOTUs.empty()) return; - for (const auto &I : getTypesDWOSections()) { + forEachTypesDWOSections([&](const DWARFSection &S) { DWOTUs.emplace_back(); - DWOTUs.back().parseDWO(*this, I.second); - } + DWOTUs.back().parseDWO(*this, S); + }); } DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { @@ -937,27 +937,23 @@ DWARFContextInMemory::DWARFContextInMemory( : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()) { for (const SectionRef &Section : Obj.sections()) { - StringRef name; - Section.getName(name); + StringRef Name; + Section.getName(Name); // Skip BSS and Virtual sections, they aren't interesting. - bool IsBSS = Section.isBSS(); - if (IsBSS) + if (Section.isBSS() || Section.isVirtual()) continue; - bool IsVirtual = Section.isVirtual(); - if (IsVirtual) - continue; - StringRef data; + StringRef Data; section_iterator RelocatedSection = Section.getRelocatedSection(); // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(*RelocatedSection, data)) - Section.getContents(data); + if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) + Section.getContents(Data); - if (auto Err = maybeDecompress(Section, name, data)) { + if (auto Err = maybeDecompress(Section, Name, Data)) { ErrorPolicy EP = HandleError( - createError("failed to decompress '" + name + "', ", std::move(Err))); + createError("failed to decompress '" + Name + "', ", std::move(Err))); if (EP == ErrorPolicy::Halt) return; continue; @@ -965,26 +961,26 @@ DWARFContextInMemory::DWARFContextInMemory( // Compressed sections names in GNU style starts from ".z", // at this point section is decompressed and we drop compression prefix. - name = name.substr( - name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. - - if (StringRef *SectionData = MapSectionToMember(name)) { - *SectionData = data; - if (name == "debug_ranges") { - // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection.Data = data; - } - } else if (name == "debug_types") { - // Find debug_types data by section rather than name as there are - // multiple, comdat grouped, debug_types sections. - TypesSections[Section].Data = data; - } else if (name == "debug_types.dwo") { - TypesDWOSections[Section].Data = data; - } + Name = Name.substr( + Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. // Map platform specific debug section names to DWARF standard section // names. - name = Obj.mapDebugSectionName(name); + Name = Obj.mapDebugSectionName(Name); + + if (StringRef *SectionData = mapSectionToMember(Name)) { + *SectionData = Data; + if (Name == "debug_ranges") { + // FIXME: Use the other dwo range section when we emit it. + RangeDWOSection.Data = Data; + } + } else if (Name == "debug_types") { + // Find debug_types data by section rather than name as there are + // multiple, comdat grouped, debug_types sections. + TypesSections[Section].Data = Data; + } else if (Name == "debug_types.dwo") { + TypesDWOSections[Section].Data = Data; + } if (RelocatedSection == Obj.section_end()) continue; @@ -1012,21 +1008,8 @@ DWARFContextInMemory::DWARFContextInMemory( // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map = - StringSwitch(RelSecName) - .Case("debug_info", &InfoSection.Relocs) - .Case("debug_loc", &LocSection.Relocs) - .Case("debug_info.dwo", &InfoDWOSection.Relocs) - .Case("debug_line", &LineSection.Relocs) - .Case("debug_str_offsets", &StringOffsetSection.Relocs) - .Case("debug_ranges", &RangeSection.Relocs) - .Case("debug_addr", &AddrSection.Relocs) - .Case("apple_names", &AppleNamesSection.Relocs) - .Case("apple_types", &AppleTypesSection.Relocs) - .Case("apple_namespaces", &AppleNamespacesSection.Relocs) - .Case("apple_namespac", &AppleNamespacesSection.Relocs) - .Case("apple_objc", &AppleObjCSection.Relocs) - .Default(nullptr); + DWARFSection *Sec = mapNameToDWARFSection(RelSecName); + RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr; if (!Map) { // Find debug_types relocs by section rather than name as there are // multiple, comdat grouped, debug_types sections. @@ -1059,10 +1042,10 @@ DWARFContextInMemory::DWARFContextInMemory( object::RelocVisitor V(Obj); uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address); if (V.error()) { - SmallString<32> Name; - Reloc.getTypeName(Name); + SmallString<32> Type; + Reloc.getTypeName(Type); ErrorPolicy EP = HandleError( - createError("failed to compute relocation: " + Name + ", ", + createError("failed to compute relocation: " + Type + ", ", errorCodeToError(object_error::parse_failed))); if (EP == ErrorPolicy::Halt) return; @@ -1079,40 +1062,47 @@ DWARFContextInMemory::DWARFContextInMemory( bool isLittleEndian) : IsLittleEndian(isLittleEndian), AddressSize(AddrSize) { for (const auto &SecIt : Sections) { - if (StringRef *SectionData = MapSectionToMember(SecIt.first())) + if (StringRef *SectionData = mapSectionToMember(SecIt.first())) *SectionData = SecIt.second->getBuffer(); } } -StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { +DWARFSection *DWARFContextInMemory::mapNameToDWARFSection(StringRef Name) { + return StringSwitch(Name) + .Case("debug_info", &InfoSection) + .Case("debug_loc", &LocSection) + .Case("debug_line", &LineSection) + .Case("debug_str_offsets", &StringOffsetSection) + .Case("debug_ranges", &RangeSection) + .Case("debug_info.dwo", &InfoDWOSection) + .Case("debug_loc.dwo", &LocDWOSection) + .Case("debug_line.dwo", &LineDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_addr", &AddrSection) + .Case("apple_names", &AppleNamesSection) + .Case("apple_types", &AppleTypesSection) + .Case("apple_namespaces", &AppleNamespacesSection) + .Case("apple_namespac", &AppleNamespacesSection) + .Case("apple_objc", &AppleObjCSection) + .Default(nullptr); +} + +StringRef *DWARFContextInMemory::mapSectionToMember(StringRef Name) { + if (DWARFSection *Sec = mapNameToDWARFSection(Name)) + return &Sec->Data; return StringSwitch(Name) - .Case("debug_info", &InfoSection.Data) .Case("debug_abbrev", &AbbrevSection) - .Case("debug_loc", &LocSection.Data) - .Case("debug_line", &LineSection.Data) .Case("debug_aranges", &ARangeSection) .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_str_offsets", &StringOffsetSection.Data) - .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) .Case("debug_gnu_pubnames", &GnuPubNamesSection) .Case("debug_gnu_pubtypes", &GnuPubTypesSection) - .Case("debug_info.dwo", &InfoDWOSection.Data) .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_loc.dwo", &LocDWOSection.Data) - .Case("debug_line.dwo", &LineDWOSection.Data) .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data) - .Case("debug_addr", &AddrSection.Data) - .Case("apple_names", &AppleNamesSection.Data) - .Case("apple_types", &AppleTypesSection.Data) - .Case("apple_namespaces", &AppleNamespacesSection.Data) - .Case("apple_namespac", &AppleNamespacesSection.Data) - .Case("apple_objc", &AppleObjCSection.Data) .Case("debug_cu_index", &CUIndexSection) .Case("debug_tu_index", &TUIndexSection) .Case("gdb_index", &GdbIndexSection) diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index ef416f72ad17..111f0bbd4444 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -395,7 +395,7 @@ DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) : void DWARFDie::attribute_iterator::updateForIndex( const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) { Index = I; - // AbbrDecl must be valid befor calling this function. + // AbbrDecl must be valid before calling this function. auto NumAttrs = AbbrDecl.getNumAttributes(); if (Index < NumAttrs) { AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index e9fd29ccc4ca..ff01c948e099 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -41,6 +41,7 @@ add_pdb_impl_folder(Native Native/InfoStream.cpp Native/InfoStreamBuilder.cpp Native/ModuleDebugStream.cpp + Native/NativeBuiltinSymbol.cpp Native/NativeCompilandSymbol.cpp Native/NativeEnumModules.cpp Native/NativeExeSymbol.cpp @@ -53,6 +54,7 @@ add_pdb_impl_folder(Native Native/PDBStringTableBuilder.cpp Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp + Native/PublicsStreamBuilder.cpp Native/RawError.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 745dd742aadc..897f78c51032 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -65,6 +65,10 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; } +void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) { + PdbFilePathNI = NI; +} + void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); // Symbols written to a PDB file are required to be 4 byte aligned. The same @@ -111,7 +115,7 @@ void DbiModuleDescriptorBuilder::finalize() { (void)Layout.Mod; // Set in constructor (void)Layout.ModDiStream; // Set in finalizeMsfLayout Layout.NumFiles = SourceFiles.size(); - Layout.PdbFilePathNI = 0; + Layout.PdbFilePathNI = PdbFilePathNI; Layout.SrcFileNameNI = 0; // This value includes both the signature field as well as the record bytes diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index a1f0671dec3e..0eeac7e4c084 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -225,6 +225,10 @@ void DbiStream::visitSectionContributions( } } +Expected DbiStream::getECName(uint32_t NI) const { + return ECNames.getStringForID(NI); +} + Error DbiStream::initializeSectionContributionData() { if (SecContrSubstream.empty()) return Error::success(); @@ -248,6 +252,9 @@ Error DbiStream::initializeSectionHeadersData() { return Error::success(); uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr); + if (StreamNum == kInvalidStreamIndex) + return Error::success(); + if (StreamNum >= Pdb.getNumStreams()) return make_error(raw_error_code::no_stream); diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index aad247ea185f..25076e40fc98 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -49,9 +49,17 @@ void DbiStreamBuilder::setSectionMap(ArrayRef SecMap) { SectionMap = SecMap; } +void DbiStreamBuilder::setSymbolRecordStreamIndex(uint32_t Index) { + SymRecordStreamIndex = Index; +} + +void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) { + PublicsStreamIndex = Index; +} + Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, ArrayRef Data) { - if (DbgStreams[(int)Type].StreamNumber) + if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex) return make_error(raw_error_code::duplicate_entry, "The specified stream type already exists"); auto ExpectedIndex = Msf.addStream(Data.size()); @@ -63,11 +71,16 @@ Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, return Error::success(); } +uint32_t DbiStreamBuilder::addECName(StringRef Name) { + return ECNamesBuilder.insert(Name); +} + uint32_t DbiStreamBuilder::calculateSerializedLength() const { // For now we only support serializing the header. return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() + calculateModiSubstreamSize() + calculateSectionContribsStreamSize() + - calculateSectionMapStreamSize() + calculateDbgStreamsSize(); + calculateSectionMapStreamSize() + calculateDbgStreamsSize() + + ECNamesBuilder.calculateSerializedSize(); } Expected @@ -247,15 +260,15 @@ Error DbiStreamBuilder::finalize() { H->PdbDllVersion = PdbDllVersion; H->MachineType = static_cast(MachineType); - H->ECSubstreamSize = 0; + H->ECSubstreamSize = ECNamesBuilder.calculateSerializedSize(); H->FileInfoSize = FileInfoBuffer.getLength(); H->ModiSubstreamSize = calculateModiSubstreamSize(); H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t); H->SecContrSubstreamSize = calculateSectionContribsStreamSize(); H->SectionMapSize = calculateSectionMapStreamSize(); H->TypeServerSize = 0; - H->SymRecordStreamIndex = kInvalidStreamIndex; - H->PublicSymbolStreamIndex = kInvalidStreamIndex; + H->SymRecordStreamIndex = SymRecordStreamIndex; + H->PublicSymbolStreamIndex = PublicsStreamIndex; H->MFCTypeServerIndex = kInvalidStreamIndex; H->GlobalSymbolStreamIndex = kInvalidStreamIndex; @@ -383,6 +396,9 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeStreamRef(FileInfoBuffer)) return EC; + if (auto EC = ECNamesBuilder.commit(Writer)) + return EC; + for (auto &Stream : DbgStreams) if (auto EC = Writer.writeInteger(Stream.StreamNumber)) return EC; diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index 354b8c0e07ff..6cdf6dde04d9 100644 --- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -86,7 +86,8 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const { for (const auto &Name : OrderedStreamNames) { auto Item = Mapping.find(Name); - assert(Item != Mapping.end()); + if (Item == Mapping.end()) + continue; if (auto EC = Writer.writeCString(Item->getKey())) return EC; } @@ -108,7 +109,8 @@ uint32_t NamedStreamMap::finalize() { for (const auto &Name : OrderedStreamNames) { auto Item = Mapping.find(Name); - assert(Item != Mapping.end()); + if (Item == Mapping.end()) + continue; FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue()); FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1; } diff --git a/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp new file mode 100644 index 000000000000..60416f69e137 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp @@ -0,0 +1,48 @@ +//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" + +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession, + SymIndexId Id, PDB_BuiltinType T, + uint64_t L) + : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) { +} + +NativeBuiltinSymbol::~NativeBuiltinSymbol() {} + +std::unique_ptr NativeBuiltinSymbol::clone() const { + return llvm::make_unique(Session, SymbolId, Type, Length); +} + +void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const { + // TODO: Apparently nothing needs this yet. +} + +PDB_SymType NativeBuiltinSymbol::getSymTag() const { + return PDB_SymType::BuiltinType; +} + +PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; } + +bool NativeBuiltinSymbol::isConstType() const { return false; } + +uint64_t NativeBuiltinSymbol::getLength() const { return Length; } + +bool NativeBuiltinSymbol::isUnalignedType() const { return false; } + +bool NativeBuiltinSymbol::isVolatileType() const { return false; } + +} // namespace pdb +} // namespace llvm diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 180c169ec209..7132a99a9f16 100644 --- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -15,7 +15,7 @@ namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, - uint32_t SymbolId, + SymIndexId SymbolId, DbiModuleDescriptor MI) : NativeRawSymbol(Session, SymbolId), Module(MI) {} diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index 6206155b9fb6..cb0830f453c8 100644 --- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -18,7 +18,7 @@ namespace llvm { namespace pdb { -NativeExeSymbol::NativeExeSymbol(NativeSession &Session, uint32_t SymbolId) +NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId) : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {} std::unique_ptr NativeExeSymbol::clone() const { diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index b4f5c96ce66b..92612bcea4ac 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -13,7 +13,7 @@ using namespace llvm; using namespace llvm::pdb; -NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId) +NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId) : Session(PDBSession), SymbolId(SymbolId) {} void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {} diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 93d43d9ef341..76de0d8f9e7e 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -10,9 +10,11 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -33,6 +35,28 @@ using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +namespace { +// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary +// to instantiate a NativeBuiltinSymbol for that type. +static const struct BuiltinTypeEntry { + codeview::SimpleTypeKind Kind; + PDB_BuiltinType Type; + uint32_t Size; +} BuiltinTypes[] = { + {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4}, + {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8}, + {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1}, + {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2}, + {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1} + // This table can be grown as necessary, but these are the only types we've + // needed so far. +}; +} // namespace + NativeSession::NativeSession(std::unique_ptr PdbFile, std::unique_ptr Allocator) : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {} @@ -71,19 +95,51 @@ Error NativeSession::createFromExe(StringRef Path, std::unique_ptr NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) { - const auto Id = static_cast(SymbolCache.size()); + const auto Id = static_cast(SymbolCache.size()); SymbolCache.push_back( llvm::make_unique(*this, Id, MI)); return llvm::make_unique( *this, std::unique_ptr(SymbolCache[Id]->clone())); } +SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) { + // First see if it's already in our cache. + const auto Entry = TypeIndexToSymbolId.find(Index); + if (Entry != TypeIndexToSymbolId.end()) + return Entry->second; + + // Symbols for built-in types are created on the fly. + if (Index.isSimple()) { + // FIXME: We will eventually need to handle pointers to other simple types, + // which are still simple types in the world of CodeView TypeIndexes. + if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct) + return 0; + const auto Kind = Index.getSimpleKind(); + const auto It = + std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes), + [Kind](const BuiltinTypeEntry &Builtin) { + return Builtin.Kind == Kind; + }); + if (It == std::end(BuiltinTypes)) + return 0; + SymIndexId Id = SymbolCache.size(); + SymbolCache.emplace_back( + llvm::make_unique(*this, Id, It->Type, It->Size)); + TypeIndexToSymbolId[Index] = Id; + return Id; + } + + // TODO: Look up PDB type by type index + + return 0; +} + uint64_t NativeSession::getLoadAddress() const { return 0; } void NativeSession::setLoadAddress(uint64_t Address) {} std::unique_ptr NativeSession::getGlobalScope() { - const auto Id = static_cast(SymbolCache.size()); + const auto Id = static_cast(SymbolCache.size()); SymbolCache.push_back(llvm::make_unique(*this, Id)); auto RawSymbol = SymbolCache[Id]->clone(); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 4f6ebb0cb342..0b6492efc70f 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -385,8 +385,11 @@ bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } bool PDBFile::hasPDBGlobalsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } + return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); } @@ -396,8 +399,10 @@ bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } bool PDBFile::hasPDBPublicsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); } diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 12b0c3b36c1d..9f35fd73629c 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" @@ -33,6 +34,8 @@ using namespace llvm::support; PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) : Allocator(Allocator) {} +PDBFileBuilder::~PDBFileBuilder() {} + Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); if (!ExpectedMsf) @@ -71,6 +74,12 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PublicsStreamBuilder &PDBFileBuilder::getPublicsBuilder() { + if (!Publics) + Publics = llvm::make_unique(*Msf); + return *Publics; +} + Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); if (!ExpectedStream) @@ -96,8 +105,6 @@ Expected PDBFileBuilder::finalizeMsfLayout() { return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); - if (auto EC = addNamedStream("/src/headerblock", 0)) - return std::move(EC); if (Info) { if (auto EC = Info->finalizeMsfLayout()) @@ -115,6 +122,14 @@ Expected PDBFileBuilder::finalizeMsfLayout() { if (auto EC = Ipi->finalizeMsfLayout()) return std::move(EC); } + if (Publics) { + if (auto EC = Publics->finalizeMsfLayout()) + return std::move(EC); + if (Dbi) { + Dbi->setPublicsStreamIndex(Publics->getStreamIndex()); + Dbi->setSymbolRecordStreamIndex(Publics->getRecordStreamIdx()); + } + } return Msf->build(); } @@ -194,5 +209,13 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } + if (Publics) { + auto PS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Publics->getStreamIndex(), Allocator); + BinaryStreamWriter PSWriter(*PS); + if (auto EC = Publics->commit(PSWriter)) + return EC; + } + return Buffer.commit(); } diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index f9f8ac219d35..acd45f7a6219 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -21,7 +21,7 @@ using namespace llvm; using namespace llvm::support; using namespace llvm::pdb; -uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getByteSize() const { return Header->ByteSize; } uint32_t PDBStringTable::getNameCount() const { return NameCount; } uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } uint32_t PDBStringTable::getSignature() const { return Header->Signature; } diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 8f3474b9ce19..9c3e654f808b 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -41,19 +41,6 @@ using namespace llvm::msf; using namespace llvm::support; using namespace llvm::pdb; -// This is PSGSIHDR struct defined in -// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h -struct PublicsStream::HeaderInfo { - ulittle32_t SymHash; - ulittle32_t AddrMap; - ulittle32_t NumThunks; - ulittle32_t SizeOfThunk; - ulittle16_t ISectThunkTable; - char Padding[2]; - ulittle32_t OffThunkTable; - ulittle32_t NumSections; -}; - PublicsStream::PublicsStream(PDBFile &File, std::unique_ptr Stream) : Pdb(File), Stream(std::move(Stream)) {} @@ -72,7 +59,8 @@ Error PublicsStream::reload() { BinaryStreamReader Reader(*Stream); // Check stream size. - if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader)) + if (Reader.bytesRemaining() < + sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader)) return make_error(raw_error_code::corrupt_file, "Publics Stream does not contain a header."); diff --git a/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp new file mode 100644 index 000000000000..28c4a8fc35d9 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp @@ -0,0 +1,89 @@ +//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" + +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" + +#include "GSI.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {} + +PublicsStreamBuilder::~PublicsStreamBuilder() {} + +uint32_t PublicsStreamBuilder::calculateSerializedLength() const { + uint32_t Size = 0; + Size += sizeof(PublicsStreamHeader); + Size += sizeof(GSIHashHeader); + Size += HashRecords.size() * sizeof(PSHashRecord); + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + Size += NumBitmapEntries; + + // FIXME: Account for hash buckets. For now since we we write a zero-bitmap + // indicating that no hash buckets are valid, we also write zero byets of hash + // bucket data. + Size += 0; + return Size; +} + +Error PublicsStreamBuilder::finalizeMsfLayout() { + Expected Idx = Msf.addStream(calculateSerializedLength()); + if (!Idx) + return Idx.takeError(); + StreamIdx = *Idx; + + Expected RecordIdx = Msf.addStream(0); + if (!RecordIdx) + return RecordIdx.takeError(); + RecordStreamIdx = *RecordIdx; + return Error::success(); +} + +Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) { + PublicsStreamHeader PSH; + GSIHashHeader GSH; + + // FIXME: Figure out what to put for these values. + PSH.AddrMap = 0; + PSH.ISectThunkTable = 0; + PSH.NumSections = 0; + PSH.NumThunks = 0; + PSH.OffThunkTable = 0; + PSH.SizeOfThunk = 0; + PSH.SymHash = 0; + + GSH.VerSignature = GSIHashHeader::HdrSignature; + GSH.VerHdr = GSIHashHeader::HdrVersion; + GSH.HrSize = 0; + GSH.NumBuckets = 0; + + if (auto EC = PublicsWriter.writeObject(PSH)) + return EC; + if (auto EC = PublicsWriter.writeObject(GSH)) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords))) + return EC; + + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + std::vector BitmapData(NumBitmapEntries); + // FIXME: Build an actual bitmap + if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData))) + return EC; + + // FIXME: Write actual hash buckets. + return Error::success(); +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index ff8749fbfed4..1164d60ffc10 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -317,7 +317,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, raw_string_ostream MangledNameStream(MangledName); Mangler::getNameWithPrefix(MangledNameStream, Name, getDataLayout()); } - return findSymbol(MangledName, CheckFunctionsOnly).getAddress(); + if (auto Sym = findSymbol(MangledName, CheckFunctionsOnly)) { + if (auto AddrOrErr = Sym.getAddress()) + return *AddrOrErr; + else + report_fatal_error(AddrOrErr.takeError()); + } else + report_fatal_error(Sym.takeError()); } JITSymbol MCJIT::findSymbol(const std::string &Name, @@ -599,11 +605,12 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef ArgValues) { void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { if (!isSymbolSearchingDisabled()) { - void *ptr = - reinterpret_cast( - static_cast(Resolver.findSymbol(Name).getAddress())); - if (ptr) - return ptr; + if (auto Sym = Resolver.findSymbol(Name)) { + if (auto AddrOrErr = Sym.getAddress()) + return reinterpret_cast( + static_cast(*AddrOrErr)); + } else if (auto Err = Sym.takeError()) + report_fatal_error(std::move(Err)); } /// If a LazyFunctionCreator is installed, use it to get/create the function. diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp index 5fe259f80b6f..de80cb1d0dd4 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp @@ -60,12 +60,13 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName, void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; } -LLVMOrcTargetAddress +LLVMOrcErrorCode LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - return J.createLazyCompileCallback(Callback, CallbackCtx); + return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx); } LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, @@ -82,38 +83,44 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, return J.setIndirectStubPointer(StubName, NewAddr); } -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); std::shared_ptr *M(unwrap(Mod)); - return J.addIRModuleEager(*M, SymbolResolver, SymbolResolverCtx); + return J.addIRModuleEager(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); std::shared_ptr *M(unwrap(Mod)); - return J.addIRModuleLazy(*M, SymbolResolver, SymbolResolverCtx); + return J.addIRModuleLazy(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) { +LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle H) { OrcCBindingsStack &J = *unwrap(JITStack); - J.removeModule(H); + return J.removeModule(H); } -LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - const char *SymbolName) { +LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, + const char *SymbolName) { OrcCBindingsStack &J = *unwrap(JITStack); - auto Sym = J.findSymbol(SymbolName, true); - return Sym.getAddress(); + return J.findSymbolAddress(*RetAddr, SymbolName, true); } -void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { - delete unwrap(JITStack); +LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { + auto *J = unwrap(JITStack); + auto Err = J->shutdown(); + delete J; + return Err; } diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 931d0a9eb2ad..e38decf94f3e 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -70,7 +70,7 @@ class OrcCBindingsStack { virtual JITSymbol findSymbolIn(const std::string &Name, bool ExportedSymbolsOnly) = 0; - virtual void removeModule() = 0; + virtual Error removeModule() = 0; }; template class GenericHandleImpl : public GenericHandle { @@ -83,7 +83,7 @@ class OrcCBindingsStack { return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly); } - void removeModule() override { return Layer.removeModule(Handle); } + Error removeModule() override { return Layer.removeModule(Handle); } private: LayerT &Layer; @@ -105,6 +105,10 @@ class OrcCBindingsStack { IndirectStubsManagerBuilder IndirectStubsMgrBuilder) : DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()), CCMgr(std::move(CCMgr)), + ObjectLayer( + []() { + return std::make_shared(); + }), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, [](Function &F) { return std::set({&F}); }, @@ -112,12 +116,14 @@ class OrcCBindingsStack { CXXRuntimeOverrides( [this](const std::string &S) { return mangle(S); }) {} - ~OrcCBindingsStack() { + LLVMOrcErrorCode shutdown() { // Run any destructors registered with __cxa_atexit. CXXRuntimeOverrides.runDestructors(); // Run any IR destructors. for (auto &DtorRunner : IRStaticDestructorRunners) - DtorRunner.runViaLayer(*this); + if (auto Err = DtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); + return LLVMOrcErrSuccess; } std::string mangle(StringRef Name) { @@ -134,14 +140,17 @@ class OrcCBindingsStack { return reinterpret_cast(static_cast(Addr)); } - JITTargetAddress - createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback, + + LLVMOrcErrorCode + createLazyCompileCallback(JITTargetAddress &RetAddr, + LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { auto CCInfo = CCMgr->getCompileCallback(); CCInfo.setCompileAction([=]() -> JITTargetAddress { return Callback(wrap(this), CallbackCtx); }); - return CCInfo.getAddress(); + RetAddr = CCInfo.getAddress(); + return LLVMOrcErrSuccess; } LLVMOrcErrorCode createIndirectStub(StringRef StubName, @@ -155,12 +164,12 @@ class OrcCBindingsStack { return mapError(IndirectStubsMgr->updatePointer(Name, Addr)); } - std::unique_ptr + std::shared_ptr createResolver(LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return orc::createLambdaResolver( [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) - -> JITSymbol { + -> JITSymbol { // Search order: // 1. JIT'd symbols. // 2. Runtime overrides. @@ -168,6 +177,9 @@ class OrcCBindingsStack { if (auto Sym = CODLayer.findSymbol(Name, true)) return Sym; + else if (auto Err = Sym.takeError()) + return Sym.takeError(); + if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name)) return Sym; @@ -178,16 +190,19 @@ class OrcCBindingsStack { return JITSymbol(nullptr); }, - [](const std::string &Name) { + [](const std::string &Name) -> JITSymbol { return JITSymbol(nullptr); }); } template - ModuleHandleT addIRModule(LayerT &Layer, std::shared_ptr M, - std::unique_ptr MemMgr, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { + LLVMOrcErrorCode + addIRModule(ModuleHandleT &RetHandle, LayerT &Layer, + std::shared_ptr M, + std::unique_ptr MemMgr, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + // Attach a data-layout if one isn't already present. if (M->getDataLayout().isDefault()) M->setDataLayout(DL); @@ -204,43 +219,52 @@ class OrcCBindingsStack { auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx); // Add the module to the JIT. - auto LH = Layer.addModule(std::move(M), std::move(MemMgr), - std::move(Resolver)); - ModuleHandleT H = createHandle(Layer, LH); + ModuleHandleT H; + if (auto LHOrErr = Layer.addModule(std::move(M), std::move(Resolver))) + H = createHandle(Layer, *LHOrErr); + else + return mapError(LHOrErr.takeError()); // Run the static constructors, and save the static destructor runner for // execution when the JIT is torn down. orc::CtorDtorRunner CtorRunner(std::move(CtorNames), H); - CtorRunner.runViaLayer(*this); + if (auto Err = CtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H); - return H; + RetHandle = H; + return LLVMOrcErrSuccess; } - ModuleHandleT addIRModuleEager(std::shared_ptr M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CompileLayer, std::move(M), + LLVMOrcErrorCode addIRModuleEager(ModuleHandleT &RetHandle, + std::shared_ptr M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CompileLayer, std::move(M), llvm::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } - ModuleHandleT addIRModuleLazy(std::shared_ptr M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CODLayer, std::move(M), + LLVMOrcErrorCode addIRModuleLazy(ModuleHandleT &RetHandle, + std::shared_ptr M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CODLayer, std::move(M), llvm::make_unique(), std::move(ExternalResolver), ExternalResolverCtx); } - void removeModule(ModuleHandleT H) { - GenericHandles[H]->removeModule(); + LLVMOrcErrorCode removeModule(ModuleHandleT H) { + if (auto Err = GenericHandles[H]->removeModule()) + return mapError(std::move(Err)); GenericHandles[H] = nullptr; FreeHandleIndexes.push_back(H); + return LLVMOrcErrSuccess; } - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { + JITSymbol findSymbol(const std::string &Name, + bool ExportedSymbolsOnly) { if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly)) return Sym; return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly); @@ -251,6 +275,26 @@ class OrcCBindingsStack { return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly); } + LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr, + const std::string &Name, + bool ExportedSymbolsOnly) { + RetAddr = 0; + if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) { + // Successful lookup, non-null symbol: + if (auto AddrOrErr = Sym.getAddress()) { + RetAddr = *AddrOrErr; + return LLVMOrcErrSuccess; + } else + return mapError(AddrOrErr.takeError()); + } else if (auto Err = Sym.takeError()) { + // Lookup failure - report error. + return mapError(std::move(Err)); + } + // Otherwise we had a successful lookup but got a null result. We already + // set RetAddr to '0' above, so just return success. + return LLVMOrcErrSuccess; + } + const std::string &getErrorMessage() const { return ErrMsg; } private: diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp index 9e70c4ac1dbf..df2d320e0f7a 100644 --- a/lib/ExecutionEngine/Orc/OrcError.cpp +++ b/lib/ExecutionEngine/Orc/OrcError.cpp @@ -45,6 +45,8 @@ class OrcErrorCategory : public std::error_category { return "Could not negotiate RPC function"; case OrcErrorCode::RPCResponseAbandoned: return "RPC response abandoned"; + case OrcErrorCode::JITSymbolNotFound: + return "JIT symbol not found"; case OrcErrorCode::UnexpectedRPCCall: return "Unexpected RPC call"; case OrcErrorCode::UnexpectedRPCResponse: @@ -63,10 +65,29 @@ static ManagedStatic OrcErrCat; namespace llvm { namespace orc { +char JITSymbolNotFound::ID = 0; + std::error_code orcError(OrcErrorCode ErrCode) { typedef std::underlying_type::type UT; return std::error_code(static_cast(ErrCode), *OrcErrCat); } +JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) + : SymbolName(std::move(SymbolName)) {} + +std::error_code JITSymbolNotFound::convertToErrorCode() const { + typedef std::underlying_type::type UT; + return std::error_code(static_cast(OrcErrorCode::JITSymbolNotFound), + *OrcErrCat); +} + +void JITSymbolNotFound::log(raw_ostream &OS) const { + OS << "Could not find symbol '" << SymbolName << "'"; +} + +const std::string &JITSymbolNotFound::getSymbolName() const { + return SymbolName; +} + } } diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 690276232a6f..346a40405ff1 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -172,10 +172,13 @@ class OrcMCJITReplacement : public ExecutionEngine { std::shared_ptr ClientResolver, std::unique_ptr TM) : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)), - MemMgr(*this, std::move(MemMgr)), Resolver(*this), + MemMgr(std::make_shared(*this, + std::move(MemMgr))), + Resolver(std::make_shared(*this)), ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this), NotifyFinalized(*this), - ObjectLayer(NotifyObjectLoaded, NotifyFinalized), + ObjectLayer([this]() { return this->MemMgr; }, NotifyObjectLoaded, + NotifyFinalized), CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), LazyEmitLayer(CompileLayer) {} @@ -199,20 +202,20 @@ class OrcMCJITReplacement : public ExecutionEngine { delete Mod; }; LocalModules.push_back(std::shared_ptr(MPtr, std::move(Deleter))); - LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver); + cantFail(LazyEmitLayer.addModule(LocalModules.back(), Resolver)); } void addObjectFile(std::unique_ptr O) override { auto Obj = std::make_shared>(std::move(O), nullptr); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addObjectFile(object::OwningBinary O) override { auto Obj = std::make_shared>(std::move(O)); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addArchive(object::OwningBinary A) override { @@ -231,7 +234,7 @@ class OrcMCJITReplacement : public ExecutionEngine { } uint64_t getSymbolAddress(StringRef Name) { - return findSymbol(Name).getAddress(); + return cantFail(findSymbol(Name).getAddress()); } JITSymbol findSymbol(StringRef Name) { @@ -320,7 +323,7 @@ class OrcMCJITReplacement : public ExecutionEngine { auto Obj = std::make_shared>( std::move(ChildObj), nullptr); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); if (auto Sym = ObjectLayer.findSymbol(Name, true)) return Sym; } @@ -341,7 +344,7 @@ class OrcMCJITReplacement : public ExecutionEngine { const LoadedObjectInfo &Info) const { M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad); M.SectionsAllocatedSinceLastLoad = SectionAddrSet(); - M.MemMgr.notifyObjectLoaded(&M, *Obj->getBinary()); + M.MemMgr->notifyObjectLoaded(&M, *Obj->getBinary()); } private: OrcMCJITReplacement &M; @@ -373,8 +376,8 @@ class OrcMCJITReplacement : public ExecutionEngine { using LazyEmitLayerT = LazyEmittingLayer; std::unique_ptr TM; - MCJITReplacementMemMgr MemMgr; - LinkingResolver Resolver; + std::shared_ptr MemMgr; + std::shared_ptr Resolver; std::shared_ptr ClientResolver; Mangler Mang; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 2b69f1a0269f..8198836f7a0c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -128,7 +128,10 @@ void RuntimeDyldImpl::resolveRelocations() { ); // First, resolve relocations associated with external symbols. - resolveExternalSymbols(); + if (auto Err = resolveExternalSymbols()) { + HasError = true; + ErrorStr = toString(std::move(Err)); + } // Iterate over all outstanding relocations for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) { @@ -243,9 +246,11 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { continue; // Then check the symbol resolver to see if there's a definition // elsewhere in this logical dylib. - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) { if (Sym.getFlags().isStrongDefinition()) continue; + } else if (auto Err = Sym.takeError()) + return std::move(Err); // else JITSymFlags &= ~JITSymbolFlags::Weak; } @@ -953,7 +958,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } } -void RuntimeDyldImpl::resolveExternalSymbols() { +Error RuntimeDyldImpl::resolveExternalSymbols() { while (!ExternalSymbolRelocations.empty()) { StringMap::iterator i = ExternalSymbolRelocations.begin(); @@ -971,10 +976,24 @@ void RuntimeDyldImpl::resolveExternalSymbols() { // This is an external symbol, try to get its address from the symbol // resolver. // First search for the symbol in this logical dylib. - Addr = Resolver.findSymbolInLogicalDylib(Name.data()).getAddress(); + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + // If that fails, try searching for an external symbol. - if (!Addr) - Addr = Resolver.findSymbol(Name.data()).getAddress(); + if (!Addr) { + if (auto Sym = Resolver.findSymbol(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + } // The call to getSymbolAddress may have caused additional modules to // be loaded, which may have added new entries to the // ExternalSymbolRelocations map. Consquently, we need to update our @@ -1009,6 +1028,8 @@ void RuntimeDyldImpl::resolveExternalSymbols() { ExternalSymbolRelocations.erase(i); } + + return Error::success(); } //===----------------------------------------------------------------------===// diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index 1bd28ef37ed1..1c54ad6fb03f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -27,9 +27,12 @@ using namespace llvm::object; namespace { class LoadedCOFFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: - LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + LoadedCOFFObjectInfo( + RuntimeDyldImpl &RTDyld, + RuntimeDyld::LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index e45fdc7aee18..5bc7434e703f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -742,7 +742,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) return InternalSymbol.getAddress(); - return getRTDyld().Resolver.findSymbol(Symbol).getAddress(); + return cantFail(getRTDyld().Resolver.findSymbol(Symbol).getAddress()); } uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 8b6f9bef66df..77c968401c16 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -123,7 +123,8 @@ void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, } class LoadedELFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 5268bc5a1868..95b04fd93251 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -417,7 +417,7 @@ class RuntimeDyldImpl { StubMap &Stubs) = 0; /// \brief Resolve relocations to external symbols. - void resolveExternalSymbols(); + Error resolveExternalSymbols(); // \brief Compute an upper bound of the memory that is required to load all // sections diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 00541e8c06fe..80e9c7ac18aa 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -27,7 +27,8 @@ using namespace llvm::object; namespace { class LoadedMachOObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper { + : public LoadedObjectInfoHelper { public: LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index b886021aee3f..fa743c280e86 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -13,6 +13,7 @@ if( APPLE ) endif() endif() +set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") if( LLVM_USE_SANITIZE_COVERAGE ) if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address") message(FATAL_ERROR @@ -20,7 +21,6 @@ if( LLVM_USE_SANITIZE_COVERAGE ) "LLVM_USE_SANITIZE_COVERAGE=YES to be set." ) endif() - set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") # Disable the coverage and sanitizer instrumentation for the fuzzer itself. set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h index 0f0573994a03..218ae5b6ac4d 100644 --- a/lib/Fuzzer/FuzzerCorpus.h +++ b/lib/Fuzzer/FuzzerCorpus.h @@ -34,6 +34,7 @@ struct InputInfo { size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; bool MayDeleteFile = false; + std::vector FeatureSet; }; class InputCorpus { @@ -68,24 +69,84 @@ class InputCorpus { } bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } - void AddToCorpus(const Unit &U, size_t NumFeatures, - bool MayDeleteFile = false) { + void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, + const std::vector &FeatureSet) { assert(!U.empty()); - uint8_t Hash[kSHA1NumBytes]; if (FeatureDebug) Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); - ComputeSHA1(U.data(), U.size(), Hash); - Hashes.insert(Sha1ToString(Hash)); Inputs.push_back(new InputInfo()); InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; II.MayDeleteFile = MayDeleteFile; - memcpy(II.Sha1, Hash, kSHA1NumBytes); + II.FeatureSet = FeatureSet; + ComputeSHA1(U.data(), U.size(), II.Sha1); + Hashes.insert(Sha1ToString(II.Sha1)); UpdateCorpusDistribution(); + PrintCorpus(); // ValidateFeatureSet(); } + // Debug-only + void PrintUnit(const Unit &U) { + if (!FeatureDebug) return; + for (uint8_t C : U) { + if (C != 'F' && C != 'U' && C != 'Z') + C = '.'; + Printf("%c", C); + } + } + + // Debug-only + void PrintFeatureSet(const std::vector &FeatureSet) { + if (!FeatureDebug) return; + Printf("{"); + for (uint32_t Feature: FeatureSet) + Printf("%u,", Feature); + Printf("}"); + } + + // Debug-only + void PrintCorpus() { + if (!FeatureDebug) return; + Printf("======= CORPUS:\n"); + int i = 0; + for (auto II : Inputs) { + if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { + Printf("[%2d] ", i); + Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); + PrintUnit(II->U); + Printf(" "); + PrintFeatureSet(II->FeatureSet); + Printf("\n"); + } + i++; + } + } + + // If FeatureSet is that same as in II, replace II->U with {Data,Size}. + bool TryToReplace(InputInfo *II, const uint8_t *Data, size_t Size, + const std::vector &FeatureSet) { + if (II->U.size() > Size && II->FeatureSet.size() && + II->FeatureSet == FeatureSet) { + if (FeatureDebug) + Printf("Replace: %zd => %zd\n", II->U.size(), Size); + Replace(II, {Data, Data + Size}); + PrintCorpus(); + return true; + } + return false; + } + + void Replace(InputInfo *II, const Unit &U) { + assert(II->U.size()); + Hashes.erase(Sha1ToString(II->Sha1)); + DeleteFile(*II); + ComputeSHA1(U.data(), U.size(), II->Sha1); + Hashes.insert(Sha1ToString(II->Sha1)); + II->U = U; + } + bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } InputInfo &ChooseUnitToMutate(Random &Rand) { @@ -124,10 +185,14 @@ class InputCorpus { Printf("\n"); } - void DeleteInput(size_t Idx) { - InputInfo &II = *Inputs[Idx]; + void DeleteFile(const InputInfo &II) { if (!OutputCorpus.empty() && II.MayDeleteFile) RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); + } + + void DeleteInput(size_t Idx) { + InputInfo &II = *Inputs[Idx]; + DeleteFile(II); Unit().swap(II.U); if (FeatureDebug) Printf("EVICTED %zd\n", Idx); diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 0453a7f443b5..87968893853e 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -265,7 +265,7 @@ int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { Unit U = FileToVector(InputFilePath); if (MaxLen && MaxLen < U.size()) U.resize(MaxLen); - F->RunOne(U.data(), U.size()); + F->ExecuteCallback(U.data(), U.size()); F->TryDetectingAMemoryLeak(U.data(), U.size(), true); return 0; } @@ -441,7 +441,6 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { Printf("INFO: The input is small enough, exiting\n"); exit(0); } - Corpus->AddToCorpus(U, 0); F->SetMaxInputLen(U.size()); F->SetMaxMutationLen(U.size() - 1); F->MinimizeCrashLoop(U); @@ -572,6 +571,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.UseCmp = Flags.use_cmp; Options.UseValueProfile = Flags.use_value_profile; Options.Shrink = Flags.shrink; + Options.ReduceInputs = Flags.reduce_inputs; Options.ShuffleAtStartUp = Flags.shuffle; Options.PreferSmall = Flags.prefer_small; Options.ReloadIntervalSec = Flags.reload; @@ -657,7 +657,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { size_t Size = SMR.ReadByteArraySize(); SMR.WriteByteArray(nullptr, 0); const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); - F->RunOne(tmp.data(), tmp.size()); + F->ExecuteCallback(tmp.data(), tmp.size()); SMR.PostServer(); } return 0; diff --git a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp index 7b02b6f0b701..503f0395cf8f 100644 --- a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp +++ b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp @@ -41,7 +41,8 @@ namespace fuzzer { ExternalFunctions::ExternalFunctions() { #define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ this->NAME = ::NAME; \ - CheckFnPtr((void *)::NAME, #NAME, WARN); + CheckFnPtr(reinterpret_cast(reinterpret_cast(::NAME)), \ + #NAME, WARN); #include "FuzzerExtFunctions.def" diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 7ff196c8fa96..5e70cbad3cf1 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -65,7 +65,9 @@ FUZZER_FLAG_INT(use_memmem, 1, FUZZER_FLAG_INT(use_value_profile, 0, "Experimental. Use value profile to guide fuzzing.") FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") -FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus elements.") +FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") +FUZZER_FLAG_INT(reduce_inputs, 0, "Experimental. " + "Try to reduce the size of inputs wile preserving their full feature sets") FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp index 75d4e3a06071..742520267b73 100644 --- a/lib/Fuzzer/FuzzerIOWindows.cpp +++ b/lib/Fuzzer/FuzzerIOWindows.cpp @@ -182,7 +182,7 @@ static size_t ParseFileName(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a directory ending in separator, like: SomeDir\ +// Parse a directory ending in separator, like: `SomeDir\` // Returns number of characters considered if successful. static size_t ParseDir(const std::string &FileName, const size_t Offset) { size_t Pos = Offset; @@ -197,7 +197,7 @@ static size_t ParseDir(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a servername and share, like: SomeServer\SomeShare\ +// Parse a servername and share, like: `SomeServer\SomeShare\` // Returns number of characters considered if successful. static size_t ParseServerAndShare(const std::string &FileName, const size_t Offset) { diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 5f184c2316e2..a732f895375e 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -65,7 +65,8 @@ class Fuzzer { static void StaticFileSizeExceedCallback(); void ExecuteCallback(const uint8_t *Data, size_t Size); - size_t RunOne(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector &Corpora); @@ -95,13 +96,12 @@ class Fuzzer { void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(InputInfo *II, const Unit &U); - size_t RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } + void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); void PrintStatusForNewUnit(const Unit &U); void ShuffleCorpus(UnitVector *V); - void AddToCorpus(const Unit &U); void CheckExitOnSrcPosOrItem(); // Trace-based fuzzing: we run a unit with some kind of tracing @@ -142,6 +142,8 @@ class Fuzzer { size_t MaxInputLen = 0; size_t MaxMutationLen = 0; + std::vector FeatureSetTmp; + // Need to know our own thread. static thread_local bool IsMyThread; }; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index fbf18357ede6..6816f3af8a6f 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -22,9 +22,6 @@ #include #if defined(__has_include) -#if __has_include() -#include -#endif #if __has_include() #include #endif @@ -348,11 +345,8 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (U.size() > MaxSize) U.resize(MaxSize); if (!Corpus.HasUnit(U)) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); + if (RunOne(U.data(), U.size())) Reloaded = true; - } } } if (Reloaded) @@ -377,10 +371,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { ExecuteCallback(&dummy, 0); for (const auto &U : *InitialCorpus) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); - } + RunOne(U.data(), U.size()); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); } @@ -392,18 +383,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { } } -size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { - if (!Size) return 0; - TotalNumberOfRuns++; - - ExecuteCallback(Data, Size); - - size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](size_t Feature) { - Corpus.AddFeature(Feature, Size, Options.Shrink); - }); - size_t NumUpdatesAfter = Corpus.NumFeatureUpdates(); - +void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { auto TimeOfUnit = duration_cast(UnitStopTime - UnitStartTime).count(); if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && @@ -415,7 +395,34 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); } - return NumUpdatesAfter - NumUpdatesBefore; +} + +bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, + InputInfo *II) { + if (!Size) return false; + + ExecuteCallback(Data, Size); + + FeatureSetTmp.clear(); + size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); + TPC.CollectFeatures([&](size_t Feature) { + Corpus.AddFeature(Feature, Size, Options.Shrink); + if (Options.ReduceInputs) + FeatureSetTmp.push_back(Feature); + }); + PrintPulseAndReportSlowInput(Data, Size); + size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; + if (NumNewFeatures) { + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + FeatureSetTmp); + CheckExitOnSrcPosOrItem(); + return true; + } + if (II && Corpus.TryToReplace(II, Data, Size, FeatureSetTmp)) { + CheckExitOnSrcPosOrItem(); + return true; + } + return false; } size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { @@ -443,6 +450,7 @@ static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { } void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { + TotalNumberOfRuns++; assert(InFuzzingThread()); if (SMR.IsClient()) SMR.WriteByteArray(Data, Size); @@ -595,12 +603,9 @@ void Fuzzer::MutateAndTestOne() { if (i == 0) StartTraceRecording(); II.NumExecutedMutations++; - if (size_t NumFeatures = RunOne(CurrentUnitData, Size)) { - Corpus.AddToCorpus({CurrentUnitData, CurrentUnitData + Size}, NumFeatures, - /*MayDeleteFile=*/true); + if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II)) ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); - CheckExitOnSrcPosOrItem(); - } + StopTraceRecording(); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); @@ -638,7 +643,8 @@ void Fuzzer::MinimizeCrashLoop(const Unit &U) { for (int i = 0; i < Options.MutateDepth; i++) { size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); assert(NewSize > 0 && NewSize <= MaxMutationLen); - RunOne(CurrentUnitData, NewSize); + ExecuteCallback(CurrentUnitData, NewSize); + PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); TryDetectingAMemoryLeak(CurrentUnitData, NewSize, /*DuringInitialCorpusExecution*/ false); } diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h index b1366789be00..9500235e2b1f 100644 --- a/lib/Fuzzer/FuzzerOptions.h +++ b/lib/Fuzzer/FuzzerOptions.h @@ -32,6 +32,7 @@ struct FuzzingOptions { bool UseCmp = false; bool UseValueProfile = false; bool Shrink = false; + bool ReduceInputs = false; int ReloadIntervalSec = 1; bool ShuffleAtStartUp = true; bool PreferSmall = true; diff --git a/lib/Fuzzer/FuzzerUtilDarwin.cpp b/lib/Fuzzer/FuzzerUtilDarwin.cpp index 9674368c355e..2df4872a9206 100644 --- a/lib/Fuzzer/FuzzerUtilDarwin.cpp +++ b/lib/Fuzzer/FuzzerUtilDarwin.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include // There is no header for this on macOS so declare here @@ -97,11 +99,16 @@ int ExecuteCommand(const std::string &Command) { pid_t Pid; char **Environ = environ; // Read from global const char *CommandCStr = Command.c_str(); - const char *Argv[] = {"sh", "-c", CommandCStr, NULL}; + char *const Argv[] = { + strdup("sh"), + strdup("-c"), + strdup(CommandCStr), + NULL + }; int ErrorCode = 0, ProcessStatus = 0; // FIXME: We probably shouldn't hardcode the shell path. ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, - (char *const *)Argv, Environ); + Argv, Environ); (void)posix_spawnattr_destroy(&SpawnAttributes); if (!ErrorCode) { pid_t SavedPid = Pid; @@ -120,6 +127,8 @@ int ExecuteCommand(const std::string &Command) { // Shell execution failure. ProcessStatus = W_EXITCODE(127, 0); } + for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) + free(Argv[i]); // Restore the signal handlers of the current process when the last thread // using this function finishes. diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index 1cf6c9502a2b..30566bdc87ae 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -118,6 +118,7 @@ set(Tests SingleStrncmpTest SpamyTest ShrinkControlFlowTest + ShrinkControlFlowSimpleTest ShrinkValueProfileTest StrcmpTest StrncmpOOBTest @@ -271,5 +272,5 @@ add_lit_testsuite(check-fuzzer "Running Fuzzer tests" # Don't add dependencies on Windows. The linker step would fail on Windows, # since cmake will use link.exe for linking and won't include compiler-rt libs. if(NOT MSVC) - add_dependencies(check-fuzzer FileCheck sancov not) + add_dependencies(check-fuzzer FileCheck sancov not llvm-symbolizer) endif() diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 812894fd947f..1053c28527bf 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -5,6 +5,9 @@ // with ASan) involving C++ standard library types when using libcxx. #define _LIBCPP_HAS_NO_ASAN +// Do not attempt to use LLVM ostream from gtest. +#define GTEST_NO_LLVM_RAW_OSTREAM 1 + #include "FuzzerCorpus.h" #include "FuzzerDictionary.h" #include "FuzzerInternal.h" @@ -590,7 +593,7 @@ TEST(Corpus, Distribution) { size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{ static_cast(i) }, 0); + C->AddToCorpus(Unit{ static_cast(i) }, 0, false, {}); std::vector Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { diff --git a/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp new file mode 100644 index 000000000000..0afd26df23a0 --- /dev/null +++ b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp @@ -0,0 +1,19 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). +#include +#include +#include +#include +#include + +static volatile int Sink; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 2) return 0; + if (Data[0] == 'F' && Data[Size / 2] == 'U' && Data[Size - 1] == 'Z') + Sink++; + return 0; +} + diff --git a/lib/Fuzzer/test/reduce_inputs.test b/lib/Fuzzer/test/reduce_inputs.test new file mode 100644 index 000000000000..a4a5c57123d3 --- /dev/null +++ b/lib/Fuzzer/test/reduce_inputs.test @@ -0,0 +1,13 @@ +# Test -reduce_inputs=1 + +RUN: rm -rf %t/C +RUN: mkdir -p %t/C +RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -reduce_inputs=1 -runs=1000000 %t/C 2>&1 | FileCheck %s +CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60' + +# Test that reduce_inputs deletes redundant files in the corpus. +RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT +COUNT: READ units: 3 + + + diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index c7f112887a30..80371780fb6d 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2119,6 +2119,8 @@ class AssemblyWriter { bool ShouldPreserveUseListOrder; UseListOrderStack UseListOrders; SmallVector MDNames; + /// Synchronization scope names registered with LLVMContext. + SmallVector SSNs; public: /// Construct an AssemblyWriter with an external SlotTracker @@ -2134,10 +2136,15 @@ class AssemblyWriter { void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, AttributeSet Attrs); void writeOperandBundles(ImmutableCallSite CS); - void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); - void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, + void writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID); + void writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID); + void writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); void writeAllMDNodes(); void writeMDNode(unsigned Slot, const MDNode *Node); @@ -2199,30 +2206,42 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } -void AssemblyWriter::writeAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope) { +void AssemblyWriter::writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + Out << " syncscope(\""; + PrintEscapedString(SSNs[SSID], Out); + Out << "\")"; + break; + } + } +} + +void AssemblyWriter::writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID) { if (Ordering == AtomicOrdering::NotAtomic) return; - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; - } - + writeSyncScope(Context, SSID); Out << " " << toIRString(Ordering); } -void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, +void AssemblyWriter::writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { assert(SuccessOrdering != AtomicOrdering::NotAtomic && FailureOrdering != AtomicOrdering::NotAtomic); - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; - } - + writeSyncScope(Context, SSID); Out << " " << toIRString(SuccessOrdering); Out << " " << toIRString(FailureOrdering); } @@ -3215,21 +3234,22 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Print atomic ordering/alignment for memory operations if (const LoadInst *LI = dyn_cast(&I)) { if (LI->isAtomic()) - writeAtomic(LI->getOrdering(), LI->getSynchScope()); + writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID()); if (LI->getAlignment()) Out << ", align " << LI->getAlignment(); } else if (const StoreInst *SI = dyn_cast(&I)) { if (SI->isAtomic()) - writeAtomic(SI->getOrdering(), SI->getSynchScope()); + writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID()); if (SI->getAlignment()) Out << ", align " << SI->getAlignment(); } else if (const AtomicCmpXchgInst *CXI = dyn_cast(&I)) { - writeAtomicCmpXchg(CXI->getSuccessOrdering(), CXI->getFailureOrdering(), - CXI->getSynchScope()); + writeAtomicCmpXchg(CXI->getContext(), CXI->getSuccessOrdering(), + CXI->getFailureOrdering(), CXI->getSyncScopeID()); } else if (const AtomicRMWInst *RMWI = dyn_cast(&I)) { - writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope()); + writeAtomic(RMWI->getContext(), RMWI->getOrdering(), + RMWI->getSyncScopeID()); } else if (const FenceInst *FI = dyn_cast(&I)) { - writeAtomic(FI->getOrdering(), FI->getSynchScope()); + writeAtomic(FI->getContext(), FI->getOrdering(), FI->getSyncScopeID()); } // Print Metadata info. diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 11259cbe1815..1cc229d68bfc 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_library(LLVMCore Pass.cpp PassManager.cpp PassRegistry.cpp + SafepointIRVerifier.cpp ProfileSummary.cpp Statepoint.cpp Type.cpp diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 3469026ad7ed..23ccd8d4cf42 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -242,7 +242,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, // X | -1 -> -1. if (ConstantInt *RHSC = dyn_cast(RHS)) - if (RHSC->isAllOnesValue()) + if (RHSC->isMinusOne()) return RHSC; Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize); @@ -1015,33 +1015,33 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, if (ConstantInt *CI2 = dyn_cast(C2)) { switch (Opcode) { case Instruction::Add: - if (CI2->equalsInt(0)) return C1; // X + 0 == X + if (CI2->isZero()) return C1; // X + 0 == X break; case Instruction::Sub: - if (CI2->equalsInt(0)) return C1; // X - 0 == X + if (CI2->isZero()) return C1; // X - 0 == X break; case Instruction::Mul: - if (CI2->equalsInt(0)) return C2; // X * 0 == 0 - if (CI2->equalsInt(1)) + if (CI2->isZero()) return C2; // X * 0 == 0 + if (CI2->isOne()) return C1; // X * 1 == X break; case Instruction::UDiv: case Instruction::SDiv: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return C1; // X / 1 == X - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 - if (CI2->isAllOnesValue()) + if (CI2->isMinusOne()) return C1; // X & -1 == X if (ConstantExpr *CE1 = dyn_cast(C1)) { @@ -1078,12 +1078,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } break; case Instruction::Or: - if (CI2->equalsInt(0)) return C1; // X | 0 == X - if (CI2->isAllOnesValue()) + if (CI2->isZero()) return C1; // X | 0 == X + if (CI2->isMinusOne()) return C2; // X | -1 == -1 break; case Instruction::Xor: - if (CI2->equalsInt(0)) return C1; // X ^ 0 == X + if (CI2->isZero()) return C1; // X ^ 0 == X if (ConstantExpr *CE1 = dyn_cast(C1)) { switch (CE1->getOpcode()) { @@ -1091,7 +1091,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: // cmp pred ^ true -> cmp !pred - assert(CI2->equalsInt(1)); + assert(CI2->isOne()); CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate(); pred = CmpInst::getInversePredicate(pred); return ConstantExpr::getCompare(pred, CE1->getOperand(0), @@ -1126,18 +1126,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::Mul: return ConstantInt::get(CI1->getContext(), C1V * C2V); case Instruction::UDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V)); case Instruction::SDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.urem(C2V)); case Instruction::SRem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); @@ -1170,7 +1170,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: - if (CI1->equalsInt(0)) return C1; + if (CI1->isZero()) return C1; break; default: break; diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index d387a6f0ecb9..e31779c83e3a 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -512,7 +512,7 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { } Constant *ConstantInt::getTrue(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext()); if (auto *VTy = dyn_cast(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), TrueC); @@ -520,7 +520,7 @@ Constant *ConstantInt::getTrue(Type *Ty) { } Constant *ConstantInt::getFalse(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext()); if (auto *VTy = dyn_cast(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), FalseC); @@ -1635,9 +1635,9 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) { Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isPointerTy() && + assert(C->getType()->isPtrOrPtrVectorTy() && "PtrToInt source must be pointer or pointer vector"); - assert(DstTy->getScalarType()->isIntegerTy() && + assert(DstTy->isIntOrIntVectorTy() && "PtrToInt destination must be integer or integer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) @@ -1648,9 +1648,9 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isIntegerTy() && + assert(C->getType()->isIntOrIntVectorTy() && "IntToPtr source must be integer or integer vector"); - assert(DstTy->getScalarType()->isPointerTy() && + assert(DstTy->isPtrOrPtrVectorTy() && "IntToPtr destination must be a pointer or pointer vector"); assert(isa(C->getType()) == isa(DstTy)); if (isa(C->getType())) @@ -1914,8 +1914,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && - pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate"); + assert(CmpInst::isIntPredicate((CmpInst::Predicate)pred) && + "Invalid ICmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -1939,7 +1939,8 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate"); + assert(CmpInst::isFPPredicate((CmpInst::Predicate)pred) && + "Invalid FCmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -2379,32 +2380,32 @@ void ConstantDataSequential::destroyConstantImpl() { Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with array type with an element @@ -2416,27 +2417,26 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::getString(LLVMContext &Context, StringRef Str, bool AddNull) { if (!AddNull) { const uint8_t *Data = reinterpret_cast(Str.data()); - return get(Context, makeArrayRef(const_cast(Data), - Str.size())); + return get(Context, makeArrayRef(Data, Str.size())); } SmallVector ElementVals; @@ -2451,32 +2451,32 @@ Constant *ConstantDataArray::getString(LLVMContext &Context, Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts){ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with vector type with an element @@ -2488,19 +2488,19 @@ Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); - return getImpl(StringRef(const_cast(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { @@ -2555,13 +2555,13 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { switch (getElementType()->getIntegerBitWidth()) { default: llvm_unreachable("Invalid bitwidth for CDS"); case 8: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 16: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 32: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); case 64: - return *const_cast(reinterpret_cast(EltPtr)); + return *reinterpret_cast(EltPtr); } } @@ -2589,16 +2589,13 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { assert(getElementType()->isFloatTy() && "Accessor can only be used when element is a 'float'"); - const float *EltPtr = reinterpret_cast(getElementPointer(Elt)); - return *const_cast(EltPtr); + return *reinterpret_cast(getElementPointer(Elt)); } double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { assert(getElementType()->isDoubleTy() && "Accessor can only be used when element is a 'float'"); - const double *EltPtr = - reinterpret_cast(getElementPointer(Elt)); - return *const_cast(EltPtr); + return *reinterpret_cast(getElementPointer(Elt)); } Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 4ff0261a7f08..2165ae5a9470 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -50,6 +50,7 @@ void llvm::initializeCore(PassRegistry &Registry) { initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializePrintBasicBlockPassPass(Registry); + initializeSafepointIRVerifierPass(Registry); initializeVerifierLegacyPassPass(Registry); } @@ -2755,11 +2756,14 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid AtomicOrdering value!"); } +// TODO: Should this and other atomic instructions support building with +// "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, LLVMBool isSingleThread, const char *Name) { return wrap( unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering), - isSingleThread ? SingleThread : CrossThread, + isSingleThread ? SyncScope::SingleThread + : SyncScope::System, Name)); } @@ -3041,7 +3045,8 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; } return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), - mapFromLLVMOrdering(ordering), singleThread ? SingleThread : CrossThread)); + mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread + : SyncScope::System)); } LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, @@ -3053,7 +3058,7 @@ LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, return wrap(unwrap(B)->CreateAtomicCmpXchg(unwrap(Ptr), unwrap(Cmp), unwrap(New), mapFromLLVMOrdering(SuccessOrdering), mapFromLLVMOrdering(FailureOrdering), - singleThread ? SingleThread : CrossThread)); + singleThread ? SyncScope::SingleThread : SyncScope::System)); } @@ -3061,17 +3066,18 @@ LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) { Value *P = unwrap(AtomicInst); if (AtomicRMWInst *I = dyn_cast(P)) - return I->getSynchScope() == SingleThread; - return cast(P)->getSynchScope() == SingleThread; + return I->getSyncScopeID() == SyncScope::SingleThread; + return cast(P)->getSyncScopeID() == + SyncScope::SingleThread; } void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) { Value *P = unwrap(AtomicInst); - SynchronizationScope Sync = NewValue ? SingleThread : CrossThread; + SyncScope::ID SSID = NewValue ? SyncScope::SingleThread : SyncScope::System; if (AtomicRMWInst *I = dyn_cast(P)) - return I->setSynchScope(Sync); - return cast(P)->setSynchScope(Sync); + return I->setSyncScopeID(SSID); + return cast(P)->setSyncScopeID(SSID); } LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst) { diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 3dd653d2d047..365cb019aec4 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -362,13 +362,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, (LI->getAlignment() == cast(I2)->getAlignment() || IgnoreAlignment) && LI->getOrdering() == cast(I2)->getOrdering() && - LI->getSynchScope() == cast(I2)->getSynchScope(); + LI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const StoreInst *SI = dyn_cast(I1)) return SI->isVolatile() == cast(I2)->isVolatile() && (SI->getAlignment() == cast(I2)->getAlignment() || IgnoreAlignment) && SI->getOrdering() == cast(I2)->getOrdering() && - SI->getSynchScope() == cast(I2)->getSynchScope(); + SI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const CmpInst *CI = dyn_cast(I1)) return CI->getPredicate() == cast(I2)->getPredicate(); if (const CallInst *CI = dyn_cast(I1)) @@ -386,7 +386,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, return EVI->getIndices() == cast(I2)->getIndices(); if (const FenceInst *FI = dyn_cast(I1)) return FI->getOrdering() == cast(I2)->getOrdering() && - FI->getSynchScope() == cast(I2)->getSynchScope(); + FI->getSyncScopeID() == cast(I2)->getSyncScopeID(); if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) return CXI->isVolatile() == cast(I2)->isVolatile() && CXI->isWeak() == cast(I2)->isWeak() && @@ -394,12 +394,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, cast(I2)->getSuccessOrdering() && CXI->getFailureOrdering() == cast(I2)->getFailureOrdering() && - CXI->getSynchScope() == cast(I2)->getSynchScope(); + CXI->getSyncScopeID() == + cast(I2)->getSyncScopeID(); if (const AtomicRMWInst *RMWI = dyn_cast(I1)) return RMWI->getOperation() == cast(I2)->getOperation() && RMWI->isVolatile() == cast(I2)->isVolatile() && RMWI->getOrdering() == cast(I2)->getOrdering() && - RMWI->getSynchScope() == cast(I2)->getSynchScope(); + RMWI->getSyncScopeID() == cast(I2)->getSyncScopeID(); return true; } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index a79b00be4ffe..2c49564e328b 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1304,34 +1304,34 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, Instruction *InsertBef) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBef) {} + SyncScope::System, InsertBef) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, BasicBlock *InsertAE) : LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAE) {} + SyncScope::System, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, Instruction *InsertBef) + SyncScope::ID SSID, Instruction *InsertBef) : UnaryInstruction(Ty, Load, Ptr, InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAE) : UnaryInstruction(cast(Ptr->getType())->getElementType(), Load, Ptr, InsertAE) { setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } @@ -1419,16 +1419,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, Instruction *InsertBefore) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBefore) {} + SyncScope::System, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAtEnd) {} + SyncScope::System, InsertAtEnd) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits::op_begin(this), @@ -1438,13 +1438,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits::op_begin(this), @@ -1454,7 +1454,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } @@ -1474,13 +1474,13 @@ void StoreInst::setAlignment(unsigned Align) { void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Cmp; Op<2>() = NewVal; setSuccessOrdering(SuccessOrdering); setFailureOrdering(FailureOrdering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && getOperand(2) && "All operands must be non-null!"); @@ -1507,25 +1507,25 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertBefore) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertAtEnd) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } //===----------------------------------------------------------------------===// @@ -1534,12 +1534,12 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Val; setOperation(Operation); setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && "All operands must be non-null!"); @@ -1554,24 +1554,24 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Val->getType(), AtomicRMW, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertBefore) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Val->getType(), AtomicRMW, OperandTraits::op_begin(this), OperandTraits::operands(this), InsertAtEnd) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } //===----------------------------------------------------------------------===// @@ -1579,19 +1579,19 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, //===----------------------------------------------------------------------===// FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } //===----------------------------------------------------------------------===// @@ -3064,16 +3064,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { if (VectorType *VT = dyn_cast(SrcTy)) if (VT->getNumElements() != cast(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isPointerTy() && - DstTy->getScalarType()->isIntegerTy(); + return SrcTy->isPtrOrPtrVectorTy() && DstTy->isIntOrIntVectorTy(); case Instruction::IntToPtr: if (isa(SrcTy) != isa(DstTy)) return false; if (VectorType *VT = dyn_cast(SrcTy)) if (VT->getNumElements() != cast(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isIntegerTy() && - DstTy->getScalarType()->isPointerTy(); + return SrcTy->isIntOrIntVectorTy() && DstTy->isPtrOrPtrVectorTy(); case Instruction::BitCast: { PointerType *SrcPtrTy = dyn_cast(SrcTy->getScalarType()); PointerType *DstPtrTy = dyn_cast(DstTy->getScalarType()); @@ -3797,12 +3795,12 @@ AllocaInst *AllocaInst::cloneImpl() const { LoadInst *LoadInst::cloneImpl() const { return new LoadInst(getOperand(0), Twine(), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } @@ -3810,7 +3808,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicCmpXchgInst *Result = new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2), getSuccessOrdering(), getFailureOrdering(), - getSynchScope()); + getSyncScopeID()); Result->setVolatile(isVolatile()); Result->setWeak(isWeak()); return Result; @@ -3818,14 +3816,14 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicRMWInst *AtomicRMWInst::cloneImpl() const { AtomicRMWInst *Result = - new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1), - getOrdering(), getSynchScope()); + new AtomicRMWInst(getOperation(), getOperand(0), getOperand(1), + getOrdering(), getSyncScopeID()); Result->setVolatile(isVolatile()); return Result; } FenceInst *FenceInst::cloneImpl() const { - return new FenceInst(getContext(), getOrdering(), getSynchScope()); + return new FenceInst(getContext(), getOrdering(), getSyncScopeID()); } TruncInst *TruncInst::cloneImpl() const { diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 2e13f362344d..c58459d6d5f5 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -81,6 +81,18 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { assert(GCTransitionEntry->second == LLVMContext::OB_gc_transition && "gc-transition operand bundle id drifted!"); (void)GCTransitionEntry; + + SyncScope::ID SingleThreadSSID = + pImpl->getOrInsertSyncScopeID("singlethread"); + assert(SingleThreadSSID == SyncScope::SingleThread && + "singlethread synchronization scope ID drifted!"); + (void)SingleThreadSSID; + + SyncScope::ID SystemSSID = + pImpl->getOrInsertSyncScopeID(""); + assert(SystemSSID == SyncScope::System && + "system synchronization scope ID drifted!"); + (void)SystemSSID; } LLVMContext::~LLVMContext() { delete pImpl; } @@ -255,6 +267,14 @@ uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const { return pImpl->getOperandBundleTagID(Tag); } +SyncScope::ID LLVMContext::getOrInsertSyncScopeID(StringRef SSN) { + return pImpl->getOrInsertSyncScopeID(SSN); +} + +void LLVMContext::getSyncScopeNames(SmallVectorImpl &SSNs) const { + pImpl->getSyncScopeNames(SSNs); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { auto It = pImpl->GCNames.find(&Fn); diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index c19e1be44fdc..57dd08b36fe7 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -205,6 +205,20 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const { return I->second; } +SyncScope::ID LLVMContextImpl::getOrInsertSyncScopeID(StringRef SSN) { + auto NewSSID = SSC.size(); + assert(NewSSID < std::numeric_limits::max() && + "Hit the maximum number of synchronization scopes allowed!"); + return SSC.insert(std::make_pair(SSN, SyncScope::ID(NewSSID))).first->second; +} + +void LLVMContextImpl::getSyncScopeNames( + SmallVectorImpl &SSNs) const { + SSNs.resize(SSC.size()); + for (const auto &SSE : SSC) + SSNs[SSE.second] = SSE.first(); +} + /// Singleton instance of the OptBisect class. /// /// This singleton is accessed via the LLVMContext::getOptBisect() function. It diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 395beb57fe37..e413a4f34432 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -1297,6 +1297,20 @@ class LLVMContextImpl { void getOperandBundleTags(SmallVectorImpl &Tags) const; uint32_t getOperandBundleTagID(StringRef Tag) const; + /// A set of interned synchronization scopes. The StringMap maps + /// synchronization scope names to their respective synchronization scope IDs. + StringMap SSC; + + /// getOrInsertSyncScopeID - Maps synchronization scope name to + /// synchronization scope ID. Every synchronization scope registered with + /// LLVMContext has unique ID except pre-defined ones. + SyncScope::ID getOrInsertSyncScopeID(StringRef SSN); + + /// getSyncScopeNames - Populates client supplied SmallVector with + /// synchronization scope names registered with LLVMContext. Synchronization + /// scope names are ordered by increasing synchronization scope IDs. + void getSyncScopeNames(SmallVectorImpl &SSNs) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index f8853ed169c5..fdc7de6eaa34 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -88,7 +88,7 @@ Module::~Module() { delete static_cast *>(NamedMDSymTab); } -RandomNumberGenerator *Module::createRNG(const Pass* P) const { +std::unique_ptr Module::createRNG(const Pass* P) const { SmallString<32> Salt(P->getPassName()); // This RNG is guaranteed to produce the same random stream only @@ -103,7 +103,7 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const { // store salt metadata from the Module constructor. Salt += sys::path::filename(getModuleIdentifier()); - return new RandomNumberGenerator(Salt); + return std::unique_ptr{new RandomNumberGenerator(Salt)}; } /// getNamedValue - Return the first global value in the module with diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp new file mode 100644 index 000000000000..8b328c221da3 --- /dev/null +++ b/lib/IR/SafepointIRVerifier.cpp @@ -0,0 +1,437 @@ +//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Run a sanity check on the IR to ensure that Safepoints - if they've been +// inserted - were inserted correctly. In particular, look for use of +// non-relocated values after a safepoint. It's primary use is to check the +// correctness of safepoint insertion immediately after insertion, but it can +// also be used to verify that later transforms have not found a way to break +// safepoint semenatics. +// +// In its current form, this verify checks a property which is sufficient, but +// not neccessary for correctness. There are some cases where an unrelocated +// pointer can be used after the safepoint. Consider this example: +// +// a = ... +// b = ... +// (a',b') = safepoint(a,b) +// c = cmp eq a b +// br c, ..., .... +// +// Because it is valid to reorder 'c' above the safepoint, this is legal. In +// practice, this is a somewhat uncommon transform, but CodeGenPrep does create +// idioms like this. The verifier knows about these cases and avoids reporting +// false positives. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/SafepointIRVerifier.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "safepoint-ir-verifier" + +using namespace llvm; + +/// This option is used for writing test cases. Instead of crashing the program +/// when verification fails, report a message to the console (for FileCheck +/// usage) and continue execution as if nothing happened. +static cl::opt PrintOnly("safepoint-ir-verifier-print-only", + cl::init(false)); + +static void Verify(const Function &F, const DominatorTree &DT); + +struct SafepointIRVerifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DominatorTree DT; + SafepointIRVerifier() : FunctionPass(ID) { + initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + DT.recalculate(F); + Verify(F, DT); + return false; // no modifications + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + StringRef getPassName() const override { return "safepoint verifier"; } +}; + +void llvm::verifySafepointIR(Function &F) { + SafepointIRVerifier pass; + pass.runOnFunction(F); +} + +char SafepointIRVerifier::ID = 0; + +FunctionPass *llvm::createSafepointIRVerifierPass() { + return new SafepointIRVerifier(); +} + +INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) +INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) + +static bool isGCPointerType(Type *T) { + if (auto *PT = dyn_cast(T)) + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. + return (1 == PT->getAddressSpace()); + return false; +} + +static bool containsGCPtrType(Type *Ty) { + if (isGCPointerType(Ty)) + return true; + if (VectorType *VT = dyn_cast(Ty)) + return isGCPointerType(VT->getScalarType()); + if (ArrayType *AT = dyn_cast(Ty)) + return containsGCPtrType(AT->getElementType()); + if (StructType *ST = dyn_cast(Ty)) + return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), + containsGCPtrType); + return false; +} + +// Debugging aid -- prints a [Begin, End) range of values. +template +static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) { + OS << "[ "; + while (Begin != End) { + OS << **Begin << " "; + ++Begin; + } + OS << "]"; +} + +/// The verifier algorithm is phrased in terms of availability. The set of +/// values "available" at a given point in the control flow graph is the set of +/// correctly relocated value at that point, and is a subset of the set of +/// definitions dominating that point. + +/// State we compute and track per basic block. +struct BasicBlockState { + // Set of values available coming in, before the phi nodes + DenseSet AvailableIn; + + // Set of values available going out + DenseSet AvailableOut; + + // AvailableOut minus AvailableIn. + // All elements are Instructions + DenseSet Contribution; + + // True if this block contains a safepoint and thus AvailableIn does not + // contribute to AvailableOut. + bool Cleared = false; +}; + + +/// Gather all the definitions dominating the start of BB into Result. This is +/// simply the Defs introduced by every dominating basic block and the function +/// arguments. +static void GatherDominatingDefs(const BasicBlock *BB, + DenseSet &Result, + const DominatorTree &DT, + DenseMap &BlockMap) { + DomTreeNode *DTN = DT[const_cast(BB)]; + + while (DTN->getIDom()) { + DTN = DTN->getIDom(); + const auto &Defs = BlockMap[DTN->getBlock()]->Contribution; + Result.insert(Defs.begin(), Defs.end()); + // If this block is 'Cleared', then nothing LiveIn to this block can be + // available after this block completes. Note: This turns out to be + // really important for reducing memory consuption of the initial available + // sets and thus peak memory usage by this verifier. + if (BlockMap[DTN->getBlock()]->Cleared) + return; + } + + for (const Argument &A : BB->getParent()->args()) + if (containsGCPtrType(A.getType())) + Result.insert(&A); +} + +/// Model the effect of an instruction on the set of available values. +static void TransferInstruction(const Instruction &I, bool &Cleared, + DenseSet &Available) { + if (isStatepoint(I)) { + Cleared = true; + Available.clear(); + } else if (containsGCPtrType(I.getType())) + Available.insert(&I); +} + +/// Compute the AvailableOut set for BB, based on the +/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set +/// when the verifier runs for the first time computing the AvailableOut set +/// for BB. +static void TransferBlock(const BasicBlock *BB, + BasicBlockState &BBS, bool FirstPass) { + + const DenseSet &AvailableIn = BBS.AvailableIn; + DenseSet &AvailableOut = BBS.AvailableOut; + + if (BBS.Cleared) { + // AvailableOut does not change no matter how the input changes, just + // leave it be. We need to force this calculation the first time so that + // we have a AvailableOut at all. + if (FirstPass) { + AvailableOut = BBS.Contribution; + } + } else { + // Otherwise, we need to reduce the AvailableOut set by things which are no + // longer in our AvailableIn + DenseSet Temp = BBS.Contribution; + set_union(Temp, AvailableIn); + AvailableOut = std::move(Temp); + } + + DEBUG(dbgs() << "Transfered block " << BB->getName() << " from "; + PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end()); + dbgs() << " to "; + PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end()); + dbgs() << "\n";); +} + +/// A given derived pointer can have multiple base pointers through phi/selects. +/// This type indicates when the base pointer is exclusively constant +/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively +/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is +/// NonConstant. +enum BaseType { + NonConstant = 1, // Base pointers is not exclusively constant. + ExclusivelyNull, + ExclusivelySomeConstant // Base pointers for a given derived pointer is from a + // set of constants, but they are not exclusively + // null. +}; + +/// Return the baseType for Val which states whether Val is exclusively +/// derived from constant/null, or not exclusively derived from constant. +/// Val is exclusively derived off a constant base when all operands of phi and +/// selects are derived off a constant base. +static enum BaseType getBaseType(const Value *Val) { + + SmallVector Worklist; + DenseSet Visited; + bool isExclusivelyDerivedFromNull = true; + Worklist.push_back(Val); + // Strip through all the bitcasts and geps to get base pointer. Also check for + // the exclusive value when there can be multiple base pointers (through phis + // or selects). + while(!Worklist.empty()) { + const Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + if (const auto *CI = dyn_cast(V)) { + Worklist.push_back(CI->stripPointerCasts()); + continue; + } + if (const auto *GEP = dyn_cast(V)) { + Worklist.push_back(GEP->getPointerOperand()); + continue; + } + // Push all the incoming values of phi node into the worklist for + // processing. + if (const auto *PN = dyn_cast(V)) { + for (Value *InV: PN->incoming_values()) + Worklist.push_back(InV); + continue; + } + if (const auto *SI = dyn_cast(V)) { + // Push in the true and false values + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + if (isa(V)) { + // We found at least one base pointer which is non-null, so this derived + // pointer is not exclusively derived from null. + if (V != Constant::getNullValue(V->getType())) + isExclusivelyDerivedFromNull = false; + // Continue processing the remaining values to make sure it's exclusively + // constant. + continue; + } + // At this point, we know that the base pointer is not exclusively + // constant. + return BaseType::NonConstant; + } + // Now, we know that the base pointer is exclusively constant, but we need to + // differentiate between exclusive null constant and non-null constant. + return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull + : BaseType::ExclusivelySomeConstant; +} + +static void Verify(const Function &F, const DominatorTree &DT) { + SpecificBumpPtrAllocator BSAllocator; + DenseMap BlockMap; + + DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"); + if (PrintOnly) + dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"; + + + for (const BasicBlock &BB : F) { + BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState; + for (const auto &I : BB) + TransferInstruction(I, BBS->Cleared, BBS->Contribution); + BlockMap[&BB] = BBS; + } + + for (auto &BBI : BlockMap) { + GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap); + TransferBlock(BBI.first, *BBI.second, true); + } + + SetVector Worklist; + for (auto &BBI : BlockMap) + Worklist.insert(BBI.first); + + // This loop iterates the AvailableIn and AvailableOut sets to a fixed point. + // The AvailableIn and AvailableOut sets decrease as we iterate. + while (!Worklist.empty()) { + const BasicBlock *BB = Worklist.pop_back_val(); + BasicBlockState *BBS = BlockMap[BB]; + + size_t OldInCount = BBS->AvailableIn.size(); + for (const BasicBlock *PBB : predecessors(BB)) + set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut); + + if (OldInCount == BBS->AvailableIn.size()) + continue; + + assert(OldInCount > BBS->AvailableIn.size() && "invariant!"); + + size_t OldOutCount = BBS->AvailableOut.size(); + TransferBlock(BB, *BBS, false); + if (OldOutCount != BBS->AvailableOut.size()) { + assert(OldOutCount > BBS->AvailableOut.size() && "invariant!"); + Worklist.insert(succ_begin(BB), succ_end(BB)); + } + } + + // We now have all the information we need to decide if the use of a heap + // reference is legal or not, given our safepoint semantics. + + bool AnyInvalidUses = false; + + auto ReportInvalidUse = [&AnyInvalidUses](const Value &V, + const Instruction &I) { + errs() << "Illegal use of unrelocated value found!\n"; + errs() << "Def: " << V << "\n"; + errs() << "Use: " << I << "\n"; + if (!PrintOnly) + abort(); + AnyInvalidUses = true; + }; + + auto isNotExclusivelyConstantDerived = [](const Value *V) { + return getBaseType(V) == BaseType::NonConstant; + }; + + for (const BasicBlock &BB : F) { + // We destructively modify AvailableIn as we traverse the block instruction + // by instruction. + DenseSet &AvailableSet = BlockMap[&BB]->AvailableIn; + for (const Instruction &I : BB) { + if (const PHINode *PN = dyn_cast(&I)) { + if (containsGCPtrType(PN->getType())) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue) && + !BlockMap[InBB]->AvailableOut.count(InValue)) + ReportInvalidUse(*InValue, *PN); + } + } else if (isa(I) && + containsGCPtrType(I.getOperand(0)->getType())) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + enum BaseType baseTyLHS = getBaseType(LHS), + baseTyRHS = getBaseType(RHS); + + // Returns true if LHS and RHS are unrelocated pointers and they are + // valid unrelocated uses. + auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () { + // A cmp instruction has valid unrelocated pointer operands only if + // both operands are unrelocated pointers. + // In the comparison between two pointers, if one is an unrelocated + // use, the other *should be* an unrelocated use, for this + // instruction to contain valid unrelocated uses. This unrelocated + // use can be a null constant as well, or another unrelocated + // pointer. + if (AvailableSet.count(LHS) || AvailableSet.count(RHS)) + return false; + // Constant pointers (that are not exclusively null) may have + // meaning in different VMs, so we cannot reorder the compare + // against constant pointers before the safepoint. In other words, + // comparison of an unrelocated use against a non-null constant + // maybe invalid. + if ((baseTyLHS == BaseType::ExclusivelySomeConstant && + baseTyRHS == BaseType::NonConstant) || + (baseTyLHS == BaseType::NonConstant && + baseTyRHS == BaseType::ExclusivelySomeConstant)) + return false; + // All other cases are valid cases enumerated below: + // 1. Comparison between an exlusively derived null pointer and a + // constant base pointer. + // 2. Comparison between an exlusively derived null pointer and a + // non-constant unrelocated base pointer. + // 3. Comparison between 2 unrelocated pointers. + return true; + }; + if (!hasValidUnrelocatedUse()) { + // Print out all non-constant derived pointers that are unrelocated + // uses, which are invalid. + if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS)) + ReportInvalidUse(*LHS, I); + if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS)) + ReportInvalidUse(*RHS, I); + } + } else { + for (const Value *V : I.operands()) + if (containsGCPtrType(V->getType()) && + isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) + ReportInvalidUse(*V, I); + } + + bool Cleared = false; + TransferInstruction(I, Cleared, AvailableSet); + (void)Cleared; + } + } + + if (PrintOnly && !AnyInvalidUses) { + dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName() + << "\n"; + } +} diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 44fe5e48c720..20e9c2b5fff2 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -538,7 +538,7 @@ bool CompositeType::indexValid(const Value *V) const { if (auto *STy = dyn_cast(this)) { // Structure indexes require (vectors of) 32-bit integer constants. In the // vector case all of the indices must be equal. - if (!V->getType()->getScalarType()->isIntegerTy(32)) + if (!V->getType()->isIntOrIntVectorTy(32)) return false; const Constant *C = dyn_cast(V); if (C && V->getType()->isVectorTy()) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 819f63520c74..454a56a76923 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2504,15 +2504,13 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isPointerTy(), - "PtrToInt source must be pointer", &I); + Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I); if (auto *PTy = dyn_cast(SrcTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), "ptrtoint not supported for non-integral pointers"); - Assert(DestTy->getScalarType()->isIntegerTy(), - "PtrToInt result must be integral", &I); + Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I); Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", &I); @@ -2531,10 +2529,9 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isIntegerTy(), + Assert(SrcTy->isIntOrIntVectorTy(), "IntToPtr source must be an integral", &I); - Assert(DestTy->getScalarType()->isPointerTy(), - "IntToPtr result must be a pointer", &I); + Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I); if (auto *PTy = dyn_cast(DestTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), @@ -2952,11 +2949,10 @@ void Verifier::visitICmpInst(ICmpInst &IC) { Assert(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), + Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(), "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. - Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, + Assert(IC.isIntPredicate(), "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); @@ -2972,8 +2968,7 @@ void Verifier::visitFCmpInst(FCmpInst &FC) { Assert(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); // Check that the predicate is valid. - Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && - FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, + Assert(FC.isFPPredicate(), "Invalid predicate in FCmp instruction!", &FC); visitInstruction(FC); @@ -3011,7 +3006,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert(GEP.getType()->getScalarType()->isPointerTy() && + Assert(GEP.getType()->isPtrOrPtrVectorTy() && GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); @@ -3027,7 +3022,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { unsigned IndexWidth = IndexTy->getVectorNumElements(); Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); } - Assert(IndexTy->getScalarType()->isIntegerTy(), + Assert(IndexTy->isIntOrIntVectorTy(), "All GEP indices should be of integer type"); } } @@ -3113,7 +3108,7 @@ void Verifier::visitLoadInst(LoadInst &LI) { ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { - Assert(LI.getSynchScope() == CrossThread, + Assert(LI.getSyncScopeID() == SyncScope::System, "Non-atomic load cannot have SynchronizationScope specified", &LI); } @@ -3142,7 +3137,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { - Assert(SI.getSynchScope() == CrossThread, + Assert(SI.getSyncScopeID() == SyncScope::System, "Non-atomic store cannot have SynchronizationScope specified", &SI); } visitInstruction(SI); @@ -4049,6 +4044,73 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "incorrect alignment of the source argument", CS); break; } + case Intrinsic::memmove_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0), + SrcAlignment = CS.getParamAlignment(1); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); + Assert(IsValidAlignment(SrcAlignment), + "incorrect alignment of the source argument", CS); + break; + } + case Intrinsic::memset_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); + break; + } case Intrinsic::gcroot: case Intrinsic::gcwrite: case Intrinsic::gcread: @@ -4253,7 +4315,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // relocated pointer. It can be casted to the correct type later if it's // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast(*CS.getInstruction()); - Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(), + Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(), "gc.relocate: relocated value must be a gc pointer", CS); auto ResultType = CS.getType(); diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 68b8c9fcb939..19973946ac5a 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -665,6 +665,15 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef Syms, auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( Sym.getIRName(), GlobalValue::ExternalLinkage, "")); ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + + // For linker redefined symbols (via --wrap or --defsym) we want to + // switch the linkage to `weak` to prevent IPOs from happening. + // Find the summary in the module for this very GV and record the new + // linkage so that we can switch it when we import the GV. + if (Res.LinkerRedefined) + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) + S->setLinkage(GlobalValue::WeakAnyLinkage); } } } @@ -1021,7 +1030,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, // Collect for each module the list of function it defines (GUID -> // Summary). - StringMap> + StringMap ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size()); ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( ModuleToDefinedGVSummaries); diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index defad1904989..f486e525b5e7 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -1256,6 +1256,18 @@ Error IRLinker::linkModuleFlagsMetadata() { return Error::success(); } +/// Return InlineAsm adjusted with target-specific directives if required. +/// For ARM and Thumb, we have to add directives to select the appropriate ISA +/// to support mixing module-level inline assembly from ARM and Thumb modules. +static std::string adjustInlineAsm(const std::string &InlineAsm, + const Triple &Triple) { + if (Triple.getArch() == Triple::thumb || Triple.getArch() == Triple::thumbeb) + return ".text\n.balign 2\n.thumb\n" + InlineAsm; + if (Triple.getArch() == Triple::arm || Triple.getArch() == Triple::armeb) + return ".text\n.balign 4\n.arm\n" + InlineAsm; + return InlineAsm; +} + Error IRLinker::run() { // Ensure metadata materialized before value mapping. if (SrcM->getMaterializer()) @@ -1293,11 +1305,13 @@ Error IRLinker::run() { // Append the module inline asm string. if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) { + std::string SrcModuleInlineAsm = adjustInlineAsm(SrcM->getModuleInlineAsm(), + SrcTriple); if (DstM.getModuleInlineAsm().empty()) - DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm()); + DstM.setModuleInlineAsm(SrcModuleInlineAsm); else DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + - SrcM->getModuleInlineAsm()); + SrcModuleInlineAsm); } // Loop over all of the linked values to compute type mappings. diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 30f357826805..c8dd63011943 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -26,6 +27,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" @@ -204,8 +206,7 @@ class ELFObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; // Map from a signature symbol to the group section index using RevGroupMapTy = DenseMap; @@ -626,7 +627,10 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; const MCSectionELF &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 0318d916aa49..eaf6f19326eb 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -653,16 +653,14 @@ MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F, // Evaluate the fixup. MCValue Target; uint64_t FixedValue; - bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & - MCFixupKindInfo::FKF_IsPCRel; - if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) { + bool IsResolved = evaluateFixup(Layout, Fixup, &F, Target, FixedValue); + if (!IsResolved) { // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. - getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, - FixedValue); + getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, FixedValue); } - return std::make_tuple(Target, FixedValue, IsPCRel); + return std::make_tuple(Target, FixedValue, IsResolved); } void MCAssembler::layout(MCAsmLayout &Layout) { @@ -738,12 +736,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) { llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; - bool IsPCRel; + bool IsResolved; MCValue Target; - std::tie(Target, FixedValue, IsPCRel) = + std::tie(Target, FixedValue, IsResolved) = handleFixup(Layout, Frag, Fixup); getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue, - IsPCRel); + IsResolved); } } } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index c4e7cdbe095e..62bf0a58fdfa 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -449,7 +449,7 @@ void MachObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup, Target, FixedValue); } diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 82352cb50c70..0d31f65c49d9 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -36,8 +36,7 @@ using namespace llvm; -#undef DEBUG_TYPE -#define DEBUG_TYPE "reloc-info" +#define DEBUG_TYPE "mc" namespace { @@ -153,7 +152,7 @@ struct WasmRelocationEntry { } void print(raw_ostream &Out) const { - Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend + Out << "Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend << ", Type=" << Type << ", FixupSection=" << FixupSection; } @@ -199,6 +198,7 @@ class WasmObjectWriter : public MCObjectWriter { DenseMap FunctionTypeIndices; + SmallVector FunctionTypes; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } @@ -224,6 +224,7 @@ class WasmObjectWriter : public MCObjectWriter { SymbolIndices.clear(); IndirectSymbolIndices.clear(); FunctionTypeIndices.clear(); + FunctionTypes.clear(); MCObjectWriter::reset(); } @@ -231,8 +232,7 @@ class WasmObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; @@ -276,6 +276,8 @@ class WasmObjectWriter : public MCObjectWriter { void writeRelocations(ArrayRef Relocations, uint64_t HeaderSize); uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); + uint32_t getFunctionType(const MCSymbolWasm& Symbol); + uint32_t registerFunctionType(const MCSymbolWasm& Symbol); }; } // end anonymous namespace @@ -350,7 +352,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -401,15 +406,11 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = RefA ? cast(&RefA->getSymbol()) : nullptr; - bool ViaWeakRef = false; if (SymA && SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); - if (const auto *Inner = dyn_cast(Expr)) { - if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) { - SymA = cast(&Inner->getSymbol()); - ViaWeakRef = true; - } - } + const auto *Inner = cast(Expr); + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) + llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and @@ -417,12 +418,8 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, // be negative and don't wrap. FixedValue = 0; - if (SymA) { - if (ViaWeakRef) - llvm_unreachable("weakref used in reloc not yet implemented"); - else - SymA->setUsedInReloc(); - } + if (SymA) + SymA->setUsedInReloc(); assert(!IsPCRel); assert(SymA); @@ -493,7 +490,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: if (!IndirectSymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found table index space:" + + report_fatal_error("symbol not found table index space: " + RelEntry.Symbol->getName()); return IndirectSymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: @@ -502,12 +499,12 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: if (!SymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found function/global index space:" + + report_fatal_error("symbol not found function/global index space: " + RelEntry.Symbol->getName()); return SymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: if (!TypeIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found in type index space:" + + report_fatal_error("symbol not found in type index space: " + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; default: @@ -913,6 +910,38 @@ void WasmObjectWriter::writeLinkingMetaDataSection( endSection(Section); } +uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + assert(TypeIndices.count(&Symbol)); + return TypeIndices[&Symbol]; +} + +uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + + WasmFunctionType F; + if (Symbol.isVariable()) { + const MCExpr *Expr = Symbol.getVariableValue(); + auto *Inner = cast(Expr); + const auto *ResolvedSym = cast(&Inner->getSymbol()); + F.Returns = ResolvedSym->getReturns(); + F.Params = ResolvedSym->getParams(); + } else { + F.Returns = Symbol.getReturns(); + F.Params = Symbol.getParams(); + } + + auto Pair = + FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); + if (Pair.second) + FunctionTypes.push_back(F); + TypeIndices[&Symbol] = Pair.first->second; + + DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n"); + DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); + return Pair.first->second; +} + void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); @@ -920,7 +949,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; // Collect information from the available symbols. - SmallVector FunctionTypes; SmallVector Functions; SmallVector TableElems; SmallVector Globals; @@ -960,37 +988,27 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Populate the Imports set. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); - int32_t Type; - if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); + if (WS.isTemporary()) + continue; - Type = Pair.first->second; - } else { - Type = int32_t(PtrType); - } + if (WS.isFunction()) + registerFunctionType(WS); // If the symbol is not defined in this translation unit, import it. - if (!WS.isTemporary() && !WS.isDefined(/*SetUsed=*/false)) { + if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) { WasmImport Import; Import.ModuleName = WS.getModuleName(); Import.FieldName = WS.getName(); if (WS.isFunction()) { Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; - Import.Type = Type; + Import.Type = getFunctionType(WS); SymbolIndices[&WS] = NumFuncImports; ++NumFuncImports; } else { Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; - Import.Type = Type; + Import.Type = int32_t(PtrType); SymbolIndices[&WS] = NumGlobalImports; ++NumGlobalImports; } @@ -1082,10 +1100,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (S.isTemporary() && S.getName().empty()) continue; - // Variable references (weak references) are handled in a second pass - if (S.isVariable()) - continue; - const auto &WS = static_cast(S); DEBUG(dbgs() << "MCSymbol: '" << S << "'" << " isDefined=" << S.isDefined() << " isExternal=" @@ -1097,20 +1111,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (WS.isWeak()) WeakSymbols.push_back(WS.getName()); + if (WS.isVariable()) + continue; + unsigned Index; if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - int32_t Type = Pair.first->second; - if (WS.isDefined(/*SetUsed=*/false)) { if (WS.getOffset() != 0) report_fatal_error( @@ -1125,21 +1131,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Prepare the function. WasmFunction Func; - Func.Type = Type; + Func.Type = getFunctionType(WS); Func.Sym = &WS; SymbolIndices[&WS] = Index; Functions.push_back(Func); } else { - // Should be no such thing as weak undefined symbol - assert(!WS.isVariable()); - // An import; the index was assigned above. Index = SymbolIndices.find(&WS)->second; } + DEBUG(dbgs() << " -> function index: " << Index << "\n"); + // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS)) { + if (IsAddressTaken.count(&WS) != 0) { IndirectSymbolIndices[&WS] = TableElems.size(); + DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n"); TableElems.push_back(Index); } } else { @@ -1185,7 +1191,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast(&Frag)) { - DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); + DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue()); } else { const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); @@ -1205,11 +1211,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Global.InitialValue = DataSection.getSectionOffset(); Global.ImportIndex = 0; SymbolIndices[&WS] = Index; + DEBUG(dbgs() << " -> global index: " << Index << "\n"); Globals.push_back(Global); } // If the symbol is visible outside this translation unit, export it. - if (WS.isExternal() && WS.isDefined(/*SetUsed=*/false)) { + if ((WS.isExternal() && WS.isDefined(/*SetUsed=*/false))) { WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; @@ -1217,26 +1224,28 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); } } - // Handle weak aliases + // Handle weak aliases. We need to process these in a separate pass because + // we need to have processed the target of the alias before the alias itself + // and the symbols are not necessarily ordered in this way. for (const MCSymbol &S : Asm.symbols()) { if (!S.isVariable()) continue; - assert(S.isExternal()); assert(S.isDefined(/*SetUsed=*/false)); const auto &WS = static_cast(S); - - // Find the target symbol of this weak alias + // Find the target symbol of this weak alias and export that index const MCExpr *Expr = WS.getVariableValue(); - auto *Inner = dyn_cast(Expr); + auto *Inner = cast(Expr); const auto *ResolvedSym = cast(&Inner->getSymbol()); + DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n"); + assert(SymbolIndices.count(ResolvedSym) > 0); uint32_t Index = SymbolIndices.find(ResolvedSym)->second; - DEBUG(dbgs() << "Weak alias: '" << WS << "' -> '" << ResolvedSym << "' = " << Index << "\n"); - SymbolIndices[&WS] = Index; + DEBUG(dbgs() << " -> index:" << Index << "\n"); WasmExport Export; Export.FieldName = WS.getName(); @@ -1245,7 +1254,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; - WeakSymbols.push_back(Export.FieldName); + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); } @@ -1254,15 +1263,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) continue; - WasmFunctionType F; - F.Returns = Fixup.Symbol->getReturns(); - F.Params = Fixup.Symbol->getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - TypeIndices[Fixup.Symbol] = Pair.first->second; + registerFunctionType(*Fixup.Symbol); } // Write out the Wasm header. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index fc5234950391..956ae70b38d1 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -197,8 +197,7 @@ class WinCOFFObjectWriter : public MCObjectWriter { void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void createFileSymbols(MCAssembler &Asm); void assignSectionNumbers(); @@ -708,9 +707,11 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } -void WinCOFFObjectWriter::recordRelocation( - MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { +void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &A = Target.getSymA()->getSymbol(); diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index fff497ba5564..7f80bf0b83a0 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -567,20 +567,16 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Ex.Name = readString(Ptr); Ex.Kind = readUint8(Ptr); Ex.Index = readVaruint32(Ptr); + WasmSymbol::SymbolType ExportType; + bool MakeSymbol = false; switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: - SymbolMap.try_emplace(Ex.Name, Symbols.size()); - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT, - Sections.size(), i); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); + ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_GLOBAL: - SymbolMap.try_emplace(Ex.Name, Symbols.size()); - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT, - Sections.size(), i); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); + ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_MEMORY: case wasm::WASM_EXTERNAL_TABLE: @@ -589,6 +585,20 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) return make_error( "Unexpected export kind", object_error::parse_failed); } + if (MakeSymbol) { + auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Ex.Name, ExportType, + Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i); + DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex] + << " sym index:" << SymIndex << "\n"); + } + } Exports.push_back(Ex); } if (Ptr != End) @@ -665,15 +675,17 @@ Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { } Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *Start = Ptr; uint32_t Count = readVaruint32(Ptr); DataSegments.reserve(Count); while (Count--) { - wasm::WasmDataSegment Segment; - Segment.Index = readVaruint32(Ptr); - if (Error Err = readInitExpr(Segment.Offset, Ptr)) + WasmSegment Segment; + Segment.Data.MemoryIndex = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) return Err; uint32_t Size = readVaruint32(Ptr); - Segment.Content = ArrayRef(Ptr, Size); + Segment.Data.Content = ArrayRef(Ptr, Size); + Segment.SectionOffset = Ptr - Start; Ptr += Size; DataSegments.push_back(Segment); } diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index 1371eacdf8f2..246eee5ddb31 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -609,8 +609,8 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { for (auto const &Child : StringChildren) { auto *Entry = reinterpret_cast(BufferStart + CurrentOffset); - Entry->Identifier.NameOffset = - StringTableOffsets[Child.second->getStringIndex()]; + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); if (Child.second->checkIsDataNode()) { Entry->Offset.DataEntryOffset = NextLevelOffset; NextLevelOffset += sizeof(coff_resource_data_entry); diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 2040efdc9d11..6a68cd265ad8 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -345,7 +345,8 @@ void MappingTraits::mapping(IO &IO, void MappingTraits::mapping( IO &IO, WasmYAML::DataSegment &Segment) { - IO.mapRequired("Index", Segment.Index); + IO.mapOptional("SectionOffset", Segment.SectionOffset); + IO.mapRequired("MemoryIndex", Segment.MemoryIndex); IO.mapRequired("Offset", Segment.Offset); IO.mapRequired("Content", Segment.Content); } diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index acb9e8d015bc..bcd365236e46 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -225,11 +225,15 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { return {}; } -std::vector OptTable::findByPrefix(StringRef Cur) const { +std::vector +OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { std::vector Ret; for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { - if (!In.Prefixes) + if (!In.Prefixes || (!In.HelpText && !In.GroupID)) continue; + if (In.Flags & DisableFlags) + continue; + for (int I = 0; In.Prefixes[I]; I++) { std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); if (StringRef(S).startswith(Cur)) diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 0380bd991d71..9e0cf27aa17b 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -281,33 +281,52 @@ AnalysisKey NoOpLoopAnalysis::Key; } // End anonymous namespace. +void PassBuilder::invokePeepholeEPCallbacks( + FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + for (auto &C : PeepholeEPCallbacks) + C(FPM, Level); +} + void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ MAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : ModuleAnalysisRegistrationCallbacks) + C(MAM); } void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ CGAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : CGSCCAnalysisRegistrationCallbacks) + C(CGAM); } void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ FAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : FunctionAnalysisRegistrationCallbacks) + C(FAM); } void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ LAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : LoopAnalysisRegistrationCallbacks) + C(LAM); } FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -340,6 +359,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (!isOptimizingForSize(Level)) FPM.addPass(LibCallsShrinkWrapPass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -363,11 +384,19 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + LPM2.addPass(LoopDeletionPass()); - // FIXME: The old pass manager has a hack to disable loop unrolling during - // ThinLTO when using sample PGO. Need to either fix it or port some - // workaround. - LPM2.addPass(LoopUnrollPass::createFull(Level)); + // Do not enable unrolling in PrepareForThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. + if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty()) + LPM2.addPass(LoopUnrollPass::createFull(Level)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. @@ -403,6 +432,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. @@ -411,19 +441,24 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + for (auto &C : ScalarOptimizerLateEPCallbacks) + C(FPM, Level); + // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); return FPM; } -static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - PassBuilder::OptimizationLevel Level, - bool RunProfileGen, std::string ProfileGenFile, - std::string ProfileUseFile) { +void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + PassBuilder::OptimizationLevel Level, + bool RunProfileGen, + std::string ProfileGenFile, + std::string ProfileUseFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification @@ -448,9 +483,8 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. + invokePeepholeEPCallbacks(FPM, Level); - // FIXME: Here the old pass manager inserts peephole extensions. - // Add them when they're supported. CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); @@ -490,7 +524,8 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { ModulePassManager PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { ModulePassManager MPM(DebugLogging); // Do basic inference of function attributes from known properties of system @@ -530,6 +565,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // optimizations. FunctionPassManager GlobalCleanupPM(DebugLogging); GlobalCleanupPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(GlobalCleanupPM, Level); + GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); @@ -544,8 +581,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion( - false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + // Do not enable it in PrepareForThinLTO phase during sample PGO because + // it changes IR to makes profile annotation in back compile inaccurate. + if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion( + false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); } // Require the GlobalsAA analysis for the module so we can query it within @@ -570,7 +610,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. - MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level))); + // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty()) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -583,7 +628,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, DebugLogging))); + buildFunctionSimplificationPipeline(Level, DebugLogging, + PrepareForThinLTO))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes @@ -643,6 +692,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // rather than on each loop in an inside-out manner, and so they are actually // function passes. + for (auto &C : VectorizerStartEPCallbacks) + C(OptimizePM, Level); + // First rotate loops that may have been un-rotated by prior passes. OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); @@ -726,7 +778,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, MPM.addPass(ForceFunctionAttrsPass()); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -747,7 +800,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/true)); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -785,7 +839,8 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, !PGOOpt->ProfileUseFile.empty())); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -868,8 +923,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - // FIXME: add peephole extensions here as the legacy PM does. - MPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass())); + FunctionPassManager PeepholeFPM(DebugLogging); + PeepholeFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(PeepholeFPM, Level); + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is @@ -887,10 +945,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(GlobalDCEPass()); FunctionPassManager FPM(DebugLogging); - // The IPO Passes may leave cruft around. Clean up after them. - // FIXME: add peephole extensions here as the legacy PM does. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(JumpThreadingPass()); // Break up allocas @@ -937,8 +995,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.add(AlignmentFromAssumptionsPass()); #endif - // FIXME: add peephole extensions to the PM here. + // FIXME: Conditionally run LoadCombine here, after it's ported + // (in case we still have this pass, given its questionable usefulness). + MainFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); @@ -1021,7 +1082,27 @@ static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { Name.startswith("lto"); } -static bool isModulePassName(StringRef Name) { +/// Tests whether registered callbacks will accept a given pass name. +/// +/// When parsing a pipeline text, the type of the outermost pipeline may be +/// omitted, in which case the type is automatically determined from the first +/// pass name in the text. This may be a name that is handled through one of the +/// callbacks. We check this through the oridinary parsing callbacks by setting +/// up a dummy PassManager in order to not force the client to also handle this +/// type of query. +template +static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) { + if (!Callbacks.empty()) { + PassManagerT DummyPM; + for (auto &CB : Callbacks) + if (CB(Name, DummyPM, {})) + return true; + } + return false; +} + +template +static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { // Manually handle aliases for pre-configured pipeline fragments. if (startsWithDefaultPipelineAliasPrefix(Name)) return DefaultAliasRegex.match(Name); @@ -1046,10 +1127,11 @@ static bool isModulePassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isCGSCCPassName(StringRef Name) { +template +static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "cgscc") return true; @@ -1070,10 +1152,11 @@ static bool isCGSCCPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isFunctionPassName(StringRef Name) { +template +static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function") return true; @@ -1092,10 +1175,11 @@ static bool isFunctionPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } -static bool isLoopPassName(StringRef Name) { +template +static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "loop") return true; @@ -1112,7 +1196,7 @@ static bool isLoopPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName(Name, Callbacks); } Optional> @@ -1213,6 +1297,11 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); return true; } + + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } @@ -1225,12 +1314,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); + .Case("O0", O0) + .Case("O1", O1) + .Case("O2", O2) + .Case("O3", O3) + .Case("Os", Os) + .Case("Oz", Oz); if (L == O0) // At O0 we do nothing at all! return true; @@ -1270,6 +1359,9 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, } #include "PassRegistry.def" + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; return false; } @@ -1317,11 +1409,16 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, *MaxRepetitions, DebugLogging)); return true; } + + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(CREATE_PASS); \ @@ -1342,6 +1439,9 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } #include "PassRegistry.def" + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; return false; } @@ -1379,11 +1479,16 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); return true; } + + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(CREATE_PASS); \ @@ -1403,6 +1508,9 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } #include "PassRegistry.def" + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; return false; } @@ -1430,11 +1538,16 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); return true; } + + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ @@ -1455,6 +1568,9 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, } #include "PassRegistry.def" + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; return false; } @@ -1473,6 +1589,9 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { } #include "PassRegistry.def" + for (auto &C : AAParsingCallbacks) + if (C(Name, AA)) + return true; return false; } @@ -1539,7 +1658,7 @@ bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, return true; } -// Primary pass pipeline description parsing routine. +// Primary pass pipeline description parsing routine for a \c ModulePassManager // FIXME: Should this routine accept a TargetMachine or require the caller to // pre-populate the analysis managers with target-specific stuff? bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, @@ -1553,21 +1672,70 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, // automatically. StringRef FirstName = Pipeline->front().Name; - if (!isModulePassName(FirstName)) { - if (isCGSCCPassName(FirstName)) + if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) { + if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) { Pipeline = {{"cgscc", std::move(*Pipeline)}}; - else if (isFunctionPassName(FirstName)) + } else if (isFunctionPassName(FirstName, + FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; - else if (isLoopPassName(FirstName)) + } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) { Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; - else + } else { + for (auto &C : TopLevelPipelineParsingCallbacks) + if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + return true; + // Unknown pass name! return false; + } } return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging); } +// Primary pass pipeline description parsing routine for a \c CGSCCPassManager +bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) + return false; + + return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c +// FunctionPassManager +bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) + return false; + + return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass, + DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c LoopPassManager +bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { // If the pipeline just consists of the word 'default' just replace the AA // manager with our default one. diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index a1d18724fcd5..48c1643cb13c 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -460,9 +460,9 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return Error::success(); } -void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, - InstrProfValueSiteRecord &Input, - uint64_t Weight) { +void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, + uint64_t Weight, + function_ref Warn) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -475,7 +475,7 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, bool Overflowed; I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); ++I; continue; } @@ -483,25 +483,25 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, } } -void InstrProfValueSiteRecord::scale(SoftInstrProfErrors &SIPE, - uint64_t Weight) { +void InstrProfValueSiteRecord::scale(uint64_t Weight, + function_ref Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } } // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. -void InstrProfRecord::mergeValueProfData(uint32_t ValueKind, - InstrProfRecord &Src, - uint64_t Weight) { +void InstrProfRecord::mergeValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight, + function_ref Warn) { uint32_t ThisNumValueSites = getNumValueSites(ValueKind); uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); if (ThisNumValueSites != OtherNumValueSites) { - SIPE.addError(instrprof_error::value_site_count_mismatch); + Warn(instrprof_error::value_site_count_mismatch); return; } if (!ThisNumValueSites) @@ -511,14 +511,15 @@ void InstrProfRecord::mergeValueProfData(uint32_t ValueKind, MutableArrayRef OtherSiteRecords = Src.getValueSitesForKind(ValueKind); for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight); + ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn); } -void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { +void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, + function_ref Warn) { // If the number of counters doesn't match we either have bad data // or a hash collision. if (Counts.size() != Other.Counts.size()) { - SIPE.addError(instrprof_error::count_mismatch); + Warn(instrprof_error::count_mismatch); return; } @@ -527,27 +528,30 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { Counts[I] = SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - mergeValueProfData(Kind, Other, Weight); + mergeValueProfData(Kind, Other, Weight, Warn); } -void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) { +void InstrProfRecord::scaleValueProfData( + uint32_t ValueKind, uint64_t Weight, + function_ref Warn) { for (auto &R : getValueSitesForKind(ValueKind)) - R.scale(SIPE, Weight); + R.scale(Weight, Warn); } -void InstrProfRecord::scale(uint64_t Weight) { +void InstrProfRecord::scale(uint64_t Weight, + function_ref Warn) { for (auto &Count : this->Counts) { bool Overflowed; Count = SaturatingMultiply(Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight); + scaleValueProfData(Kind, Weight, Warn); } // Map indirect call target name hash to name string. diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 1ed1fb8b6f0b..1b39a0695aac 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -221,7 +221,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { #undef VP_READ_ADVANCE } -Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { // Skip empty lines and comments. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; @@ -377,13 +377,13 @@ Error RawInstrProfReader::readHeader( } template -Error RawInstrProfReader::readName(InstrProfRecord &Record) { +Error RawInstrProfReader::readName(NamedInstrProfRecord &Record) { Record.Name = getName(Data->NameRef); return success(); } template -Error RawInstrProfReader::readFuncHash(InstrProfRecord &Record) { +Error RawInstrProfReader::readFuncHash(NamedInstrProfRecord &Record) { Record.Hash = swap(Data->FuncHash); return success(); } @@ -445,7 +445,7 @@ Error RawInstrProfReader::readValueProfilingData( } template -Error RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error RawInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { if (atEnd()) // At this point, ValueDataStart field points to the next header. if (Error E = readNextHeader(getNextHeaderPos())) @@ -550,7 +550,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, template Error InstrProfReaderIndex::getRecords( - StringRef FuncName, ArrayRef &Data) { + StringRef FuncName, ArrayRef &Data) { auto Iter = HashTable->find(FuncName); if (Iter == HashTable->end()) return make_error(instrprof_error::unknown_function); @@ -564,7 +564,7 @@ Error InstrProfReaderIndex::getRecords( template Error InstrProfReaderIndex::getRecords( - ArrayRef &Data) { + ArrayRef &Data) { if (atEnd()) return make_error(instrprof_error::eof); @@ -644,7 +644,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); // FIXME: This only computes an empty summary. Need to call addRecord for - // all InstrProfRecords to get the correct summary. + // all NamedInstrProfRecords to get the correct summary. this->Summary = Builder.getSummary(); return Cur; } @@ -707,7 +707,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { Expected IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, uint64_t FuncHash) { - ArrayRef Data; + ArrayRef Data; Error Err = Index->getRecords(FuncName, Data); if (Err) return std::move(Err); @@ -732,10 +732,10 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, return success(); } -Error IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { static unsigned RecordIndex = 0; - ArrayRef Data; + ArrayRef Data; Error E = Index->getRecords(Data); if (E) diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 9efea78ed2a8..ce3f8806e12e 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -176,38 +176,46 @@ void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } -Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { - auto &ProfileDataMap = FunctionData[I.Name]; +void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref Warn) { + auto Name = I.Name; + auto Hash = I.Hash; + addRecord(Name, Hash, std::move(I), Weight, Warn); +} + +void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, + InstrProfRecord &&I, uint64_t Weight, + function_ref Warn) { + auto &ProfileDataMap = FunctionData[Name]; bool NewFunc; ProfilingData::iterator Where; std::tie(Where, NewFunc) = - ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); InstrProfRecord &Dest = Where->second; + auto MapWarn = [&](instrprof_error E) { + Warn(make_error(E)); + }; + if (NewFunc) { // We've never seen a function with this name and hash, add it. Dest = std::move(I); - // Fix up the name to avoid dangling reference. - Dest.Name = FunctionData.find(Dest.Name)->getKey(); if (Weight > 1) - Dest.scale(Weight); + Dest.scale(Weight, MapWarn); } else { // We're updating a function we've seen before. - Dest.merge(I, Weight); + Dest.merge(I, Weight, MapWarn); } Dest.sortValueData(); - - return Dest.takeError(); } -Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) { +void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) - if (Error E = addRecord(std::move(Func.second), 1)) - return E; - return Error::success(); + addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -323,11 +331,12 @@ static const char *ValueProfKindStr[] = { #include "llvm/ProfileData/InstrProfData.inc" }; -void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, +void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Func, InstrProfSymtab &Symtab, raw_fd_ostream &OS) { - OS << Func.Name << "\n"; - OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << Name << "\n"; + OS << "# Func Hash:\n" << Hash << "\n"; OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; OS << "# Counter Values:\n"; for (uint64_t Count : Func.Counts) @@ -375,6 +384,6 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) for (const auto &Func : I.getValue()) - writeRecordInText(Func.second, Symtab, OS); + writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS); return Error::success(); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 0345a5e3d2a1..50173f5256bf 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1236,7 +1236,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc, << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired << " positional argument" << (NumPositionalRequired > 1 ? "s" : "") - << ": See: " << argv[0] << " - help\n"; + << ": See: " << argv[0] << " -help\n"; ErrorParsing = true; } else if (!HasUnlimitedPositionals && diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 9398789cea87..d8422115eae8 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm-c/Support.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" @@ -73,19 +74,37 @@ class DynamicLibrary::HandleSet { return true; } - void *Lookup(const char *Symbol) { - // Process handle gets first try. + void *LibLookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + if (Order & SO_LoadOrder) { + for (void *Handle : Handles) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } else { + for (void *Handle : llvm::reverse(Handles)) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } + return nullptr; + } + + void *Lookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + assert(!((Order & SO_LoadedFirst) && (Order & SO_LoadedLast)) && + "Invalid Ordering"); + + if (!Process || (Order & SO_LoadedFirst)) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } if (Process) { + // Use OS facilities to search the current binary and all loaded libs. if (void *Ptr = DLSym(Process, Symbol)) return Ptr; -#ifndef NDEBUG - for (void *Handle : Handles) - assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle"); -#endif - } else { - // Iterate in reverse, so newer libraries/symbols override older. - for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) { - if (void *Ptr = DLSym(*I, Symbol)) + + // Search any libs that might have been skipped because of RTLD_LOCAL. + if (Order & SO_LoadedLast) { + if (void *Ptr = LibLookup(Symbol, Order)) return Ptr; } } @@ -113,6 +132,8 @@ static llvm::ManagedStatic> SymbolsMutex; #endif char DynamicLibrary::Invalid; +DynamicLibrary::SearchOrdering DynamicLibrary::SearchOrder = + DynamicLibrary::SO_Linker; namespace llvm { void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { @@ -170,7 +191,7 @@ void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { // Now search the libraries. if (OpenedHandles.isConstructed()) { - if (void *Ptr = OpenedHandles->Lookup(SymbolName)) + if (void *Ptr = OpenedHandles->Lookup(SymbolName, SearchOrder)) return Ptr; } } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index a7d3a18003ee..fe69151665c6 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -20,15 +20,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #include #include +#include +#include #if defined(HAVE_UNISTD_H) # include @@ -43,18 +42,25 @@ using namespace llvm; static fatal_error_handler_t ErrorHandler = nullptr; static void *ErrorHandlerUserData = nullptr; -static ManagedStatic ErrorHandlerMutex; +static fatal_error_handler_t BadAllocErrorHandler = nullptr; +static void *BadAllocErrorHandlerUserData = nullptr; + +// Mutexes to synchronize installing error handlers and calling error handlers. +// Do not use ManagedStatic, or that may allocate memory while attempting to +// report an OOM. +static std::mutex ErrorHandlerMutex; +static std::mutex BadAllocErrorHandlerMutex; void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard Lock(ErrorHandlerMutex); assert(!ErrorHandler && "Error handler already registered!\n"); ErrorHandler = handler; ErrorHandlerUserData = user_data; } void llvm::remove_fatal_error_handler() { - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard Lock(ErrorHandlerMutex); ErrorHandler = nullptr; ErrorHandlerUserData = nullptr; } @@ -77,7 +83,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { { // Only acquire the mutex while reading the handler, so as not to invoke a // user-supplied callback under a lock. - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard Lock(ErrorHandlerMutex); handler = ErrorHandler; handlerData = ErrorHandlerUserData; } @@ -104,6 +110,48 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { exit(1); } +void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data) { + std::lock_guard Lock(BadAllocErrorHandlerMutex); + assert(!ErrorHandler && "Bad alloc error handler already registered!\n"); + BadAllocErrorHandler = handler; + BadAllocErrorHandlerUserData = user_data; +} + +void llvm::remove_bad_alloc_error_handler() { + std::lock_guard Lock(BadAllocErrorHandlerMutex); + BadAllocErrorHandler = nullptr; + BadAllocErrorHandlerUserData = nullptr; +} + +void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { + fatal_error_handler_t Handler = nullptr; + void *HandlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. + std::lock_guard Lock(BadAllocErrorHandlerMutex); + Handler = BadAllocErrorHandler; + HandlerData = BadAllocErrorHandlerUserData; + } + + if (Handler) { + Handler(HandlerData, Reason, GenCrashDiag); + llvm_unreachable("bad alloc handler should not return"); + } + +#ifdef LLVM_ENABLE_EXCEPTIONS + // If exceptions are enabled, make OOM in malloc look like OOM in new. + throw std::bad_alloc(); +#else + // Don't call the normal error handler. It may allocate memory. Directly write + // an OOM to stderr and abort. + char OOMMessage[] = "LLVM ERROR: out of memory\n"; + (void)::write(2, OOMMessage, strlen(OOMMessage)); + abort(); +#endif +} + void llvm::llvm_unreachable_internal(const char *msg, const char *file, unsigned line) { // This code intentionally doesn't call the ErrorHandler callback, because diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 232efe648b03..9f22f89b3c9e 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -281,11 +281,17 @@ enum ProcessorVendors { }; enum ProcessorTypes { - INTEL_ATOM = 1, + INTEL_BONNELL = 1, INTEL_CORE2, INTEL_COREI7, AMDFAM10H, AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + // Entries below this are not in libgcc/compiler-rt. INTEL_i386, INTEL_i486, INTEL_PENTIUM, @@ -295,16 +301,13 @@ enum ProcessorTypes { INTEL_PENTIUM_IV, INTEL_PENTIUM_M, INTEL_CORE_DUO, - INTEL_XEONPHI, INTEL_X86_64, INTEL_NOCONA, INTEL_PRESCOTT, AMD_i486, AMDPENTIUM, AMDATHLON, - AMDFAM14H, - AMDFAM16H, - AMDFAM17H, + INTEL_GOLDMONT, CPU_TYPE_MAX }; @@ -317,34 +320,26 @@ enum ProcessorSubtypes { AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, - INTEL_PENTIUM_MMX, - INTEL_CORE2_65, - INTEL_CORE2_45, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, INTEL_COREI7_IVYBRIDGE, INTEL_COREI7_HASWELL, INTEL_COREI7_BROADWELL, INTEL_COREI7_SKYLAKE, INTEL_COREI7_SKYLAKE_AVX512, - INTEL_ATOM_BONNELL, - INTEL_ATOM_SILVERMONT, - INTEL_ATOM_GOLDMONT, - INTEL_KNIGHTS_LANDING, + // Entries below this are not in libgcc/compiler-rt. + INTEL_PENTIUM_MMX, + INTEL_CORE2_65, + INTEL_CORE2_45, AMDPENTIUM_K6, AMDPENTIUM_K62, AMDPENTIUM_K63, AMDPENTIUM_GEODE, - AMDATHLON_TBIRD, - AMDATHLON_MP, + AMDATHLON_CLASSIC, AMDATHLON_XP, + AMDATHLON_K8, AMDATHLON_K8SSE3, - AMDATHLON_OPTERON, - AMDATHLON_FX, - AMDATHLON_64, - AMD_BTVER1, - AMD_BTVER2, - AMDFAM15H_BDVER3, - AMDFAM15H_BDVER4, - AMDFAM17H_ZNVER1, CPU_SUBTYPE_MAX }; @@ -360,9 +355,28 @@ enum ProcessorFeatures { FEATURE_SSE4_2, FEATURE_AVX, FEATURE_AVX2, - FEATURE_AVX512, - FEATURE_AVX512SAVE, - FEATURE_MOVBE, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + // Only one bit free left in the first 32 features. + FEATURE_MOVBE = 32, FEATURE_ADX, FEATURE_EM64T }; @@ -406,7 +420,6 @@ static bool isCpuIdSupported() { /// the specified arguments. If we can't run cpuid on the host, return true. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. @@ -416,14 +429,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. @@ -433,7 +448,6 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; -#endif return false; #else return true; @@ -446,16 +460,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); + return false; #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); @@ -463,6 +477,9 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; + return false; +#else + return true; #endif #elif defined(__i386__) || defined(_M_IX86) #if defined(__GNUC__) || defined(__clang__) @@ -471,6 +488,7 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); + return false; #elif defined(_MSC_VER) __asm { mov eax,value @@ -485,16 +503,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, mov esi,rEDX mov dword ptr [esi],edx } -#endif -#else - assert(0 && "This method is defined only for x86."); -#endif return false; #else return true; #endif +#else + return true; +#endif } +// Read control register 0 (XCR0). Used to detect features such as AVX. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) // Check xgetbv; this uses a .byte sequence instead of the instruction @@ -526,9 +544,10 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } static void -getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, - unsigned int Brand_id, unsigned int Features, - unsigned *Type, unsigned *Subtype) { +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned Features2, unsigned *Type, + unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { @@ -681,12 +700,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Skylake Xeon: case 0x55: *Type = INTEL_COREI7; - // Check that we really have AVX512 - if (Features & (1 << FEATURE_AVX512)) { - *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" - } else { - *Subtype = INTEL_COREI7_SKYLAKE; // "skylake" - } + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" break; case 0x1c: // Most 45 nm Intel Atom processors @@ -694,8 +708,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; + *Type = INTEL_BONNELL; break; // "bonnell" // Atom Silvermont codes from the Intel software optimization guide. @@ -705,27 +718,23 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x5a: case 0x5d: case 0x4c: // really airmont - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + *Type = INTEL_SILVERMONT; break; // "silvermont" // Goldmont: case 0x5c: case 0x5f: - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_GOLDMONT; + *Type = INTEL_GOLDMONT; break; // "goldmont" case 0x57: - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + *Type = INTEL_KNL; // knl break; default: // Unknown family 6 CPU, try to guess. - if (Features & (1 << FEATURE_AVX512)) { - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + if (Features & (1 << FEATURE_AVX512F)) { + *Type = INTEL_KNL; // knl break; } - if (Features & (1 << FEATURE_ADX)) { + if (Features2 & (1 << (FEATURE_ADX - 32))) { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_BROADWELL; break; @@ -741,9 +750,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSE4_2)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_SILVERMONT; } else { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_NEHALEM; @@ -756,16 +764,15 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSSE3)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; // "bonnell" + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_BONNELL; // "bonnell" } else { *Type = INTEL_CORE2; // "core2" *Subtype = INTEL_CORE2_65; } break; } - if (Features & (1 << FEATURE_EM64T)) { + if (Features2 & (1 << (FEATURE_EM64T - 32))) { *Type = INTEL_X86_64; break; // x86-64 } @@ -796,8 +803,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron // processor, and Mobile Intel Celeron processor. All processors // are model 02h and manufactured using the 0.13 micron process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D @@ -811,13 +818,13 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Extreme Edition, Intel Xeon processor, Intel Xeon processor // MP, Intel Celeron D processor. All processors are model 06h // and manufactured using the 65 nm process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_NOCONA + : INTEL_PRESCOTT); break; default: - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; } break; @@ -827,10 +834,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, } } -static void getAMDProcessorTypeAndSubtype(unsigned int Family, - unsigned int Model, - unsigned int Features, - unsigned *Type, +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, unsigned *Subtype) { // FIXME: this poorly matches the generated SubtargetFeatureKV table. There // appears to be no way to generate the wide variety of AMD-specific targets @@ -860,38 +865,20 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, break; case 6: *Type = AMDATHLON; - switch (Model) { - case 4: - *Subtype = AMDATHLON_TBIRD; - break; // "athlon-tbird" - case 6: - case 7: - case 8: - *Subtype = AMDATHLON_MP; - break; // "athlon-mp" - case 10: + if (Features & (1 << FEATURE_SSE)) { *Subtype = AMDATHLON_XP; break; // "athlon-xp" } - break; + *Subtype = AMDATHLON_CLASSIC; + break; // "athlon" case 15: *Type = AMDATHLON; if (Features & (1 << FEATURE_SSE3)) { *Subtype = AMDATHLON_K8SSE3; break; // "k8-sse3" } - switch (Model) { - case 1: - *Subtype = AMDATHLON_OPTERON; - break; // "opteron" - case 5: - *Subtype = AMDATHLON_FX; - break; // "athlon-fx"; also opteron - default: - *Subtype = AMDATHLON_64; - break; // "athlon64" - } - break; + *Subtype = AMDATHLON_K8; + break; // "k8" case 16: *Type = AMDFAM10H; // "amdfam10" switch (Model) { @@ -907,19 +894,13 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 20: - *Type = AMDFAM14H; - *Subtype = AMD_BTVER1; + *Type = AMD_BTVER1; break; // "btver1"; case 21: *Type = AMDFAM15H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1" - } - if (Model >= 0x50 && Model <= 0x6f) { + if (Model >= 0x60 && Model <= 0x7f) { *Subtype = AMDFAM15H_BDVER4; - break; // "bdver4"; 50h-6Fh: Excavator + break; // "bdver4"; 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { *Subtype = AMDFAM15H_BDVER3; @@ -935,39 +916,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 22: - *Type = AMDFAM16H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1"; - } - *Subtype = AMD_BTVER2; + *Type = AMD_BTVER2; break; // "btver2" case 23: *Type = AMDFAM17H; - if (Features & (1 << FEATURE_ADX)) { - *Subtype = AMDFAM17H_ZNVER1; - break; // "znver1" - } - *Subtype = AMD_BTVER1; + *Subtype = AMDFAM17H_ZNVER1; break; default: break; // "generic" } } -static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, - unsigned MaxLeaf) { +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, + unsigned *Features2Out) { unsigned Features = 0; - unsigned int EAX, EBX; - Features |= (((EDX >> 23) & 1) << FEATURE_MMX); - Features |= (((EDX >> 25) & 1) << FEATURE_SSE); - Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); - Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); - Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); - Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); - Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); - Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); + unsigned Features2 = 0; + unsigned EAX, EBX; + + if ((EDX >> 15) & 1) + Features |= 1 << FEATURE_CMOV; + if ((EDX >> 23) & 1) + Features |= 1 << FEATURE_MMX; + if ((EDX >> 25) & 1) + Features |= 1 << FEATURE_SSE; + if ((EDX >> 26) & 1) + Features |= 1 << FEATURE_SSE2; + + if ((ECX >> 0) & 1) + Features |= 1 << FEATURE_SSE3; + if ((ECX >> 1) & 1) + Features |= 1 << FEATURE_PCLMUL; + if ((ECX >> 9) & 1) + Features |= 1 << FEATURE_SSSE3; + if ((ECX >> 12) & 1) + Features |= 1 << FEATURE_FMA; + if ((ECX >> 19) & 1) + Features |= 1 << FEATURE_SSE4_1; + if ((ECX >> 20) & 1) + Features |= 1 << FEATURE_SSE4_2; + if ((ECX >> 23) & 1) + Features |= 1 << FEATURE_POPCNT; + if ((ECX >> 25) & 1) + Features |= 1 << FEATURE_AES; + + if ((ECX >> 22) & 1) + Features2 |= 1 << (FEATURE_MOVBE - 32); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context @@ -976,20 +970,65 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + Features |= 1 << FEATURE_AVX; + bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); - bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); - bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); - bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); - Features |= (HasAVX << FEATURE_AVX); - Features |= (HasAVX2 << FEATURE_AVX2); - Features |= (HasAVX512 << FEATURE_AVX512); - Features |= (HasAVX512Save << FEATURE_AVX512SAVE); - Features |= (HasADX << FEATURE_ADX); - getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); - return Features; + if (HasLeaf7 && ((EBX >> 3) & 1)) + Features |= 1 << FEATURE_BMI; + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + Features |= 1 << FEATURE_AVX2; + if (HasLeaf7 && ((EBX >> 9) & 1)) + Features |= 1 << FEATURE_BMI2; + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512F; + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512DQ; + if (HasLeaf7 && ((EBX >> 19) & 1)) + Features2 |= 1 << (FEATURE_ADX - 32); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512IFMA; + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512PF; + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512ER; + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512CD; + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512BW; + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VL; + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VBMI; + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VPOPCNTDQ; + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124VNNIW; + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124FMAPS; + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + Features |= 1 << FEATURE_SSE4_A; + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + Features |= 1 << FEATURE_XOP; + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + Features |= 1 << FEATURE_FMA4; + + if (HasExtLeaf1 && ((EDX >> 29) & 1)) + Features2 |= 1 << (FEATURE_EM64T - 32); + + *FeaturesOut = Features; + *Features2Out = Features2; } StringRef sys::getHostCPUName() { @@ -1004,23 +1043,22 @@ StringRef sys::getHostCPUName() { if(!isCpuIdSupported()) return "generic"; #endif - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX)) - return "generic"; - if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) return "generic"; + getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Brand_id = EBX & 0xff; unsigned Family = 0, Model = 0; - unsigned Features = 0; + unsigned Features = 0, Features2 = 0; detectX86FamilyModel(EAX, &Family, &Model); - Features = getAvailableFeatures(ECX, EDX, MaxLeaf); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); unsigned Type; unsigned Subtype; if (Vendor == SIG_INTEL) { - getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type, - &Subtype); + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + Features2, &Type, &Subtype); switch (Type) { case INTEL_i386: return "i386"; @@ -1049,7 +1087,7 @@ StringRef sys::getHostCPUName() { case INTEL_CORE2_45: return "penryn"; default: - return "core2"; + llvm_unreachable("Unexpected subtype!"); } case INTEL_COREI7: switch (Subtype) { @@ -1070,21 +1108,16 @@ StringRef sys::getHostCPUName() { case INTEL_COREI7_SKYLAKE_AVX512: return "skylake-avx512"; default: - return "corei7"; + llvm_unreachable("Unexpected subtype!"); } - case INTEL_ATOM: - switch (Subtype) { - case INTEL_ATOM_BONNELL: - return "bonnell"; - case INTEL_ATOM_GOLDMONT: - return "goldmont"; - case INTEL_ATOM_SILVERMONT: - return "silvermont"; - default: - return "atom"; - } - case INTEL_XEONPHI: - return "knl"; /*update for more variants added*/ + case INTEL_BONNELL: + return "bonnell"; + case INTEL_SILVERMONT: + return "silvermont"; + case INTEL_GOLDMONT: + return "goldmont"; + case INTEL_KNL: + return "knl"; case INTEL_X86_64: return "x86-64"; case INTEL_NOCONA: @@ -1092,7 +1125,7 @@ StringRef sys::getHostCPUName() { case INTEL_PRESCOTT: return "prescott"; default: - return "generic"; + break; } } else if (Vendor == SIG_AMD) { getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); @@ -1114,31 +1147,24 @@ StringRef sys::getHostCPUName() { } case AMDATHLON: switch (Subtype) { - case AMDATHLON_TBIRD: - return "athlon-tbird"; - case AMDATHLON_MP: - return "athlon-mp"; + case AMDATHLON_CLASSIC: + return "athlon"; case AMDATHLON_XP: return "athlon-xp"; + case AMDATHLON_K8: + return "k8"; case AMDATHLON_K8SSE3: return "k8-sse3"; - case AMDATHLON_OPTERON: - return "opteron"; - case AMDATHLON_FX: - return "athlon-fx"; - case AMDATHLON_64: - return "athlon64"; default: - return "athlon"; + llvm_unreachable("Unexpected subtype!"); } case AMDFAM10H: - if(Subtype == AMDFAM10H_BARCELONA) - return "barcelona"; return "amdfam10"; - case AMDFAM14H: + case AMD_BTVER1: return "btver1"; case AMDFAM15H: switch (Subtype) { + default: // There are gaps in the subtype detection. case AMDFAM15H_BDVER1: return "bdver1"; case AMDFAM15H_BDVER2: @@ -1147,31 +1173,13 @@ StringRef sys::getHostCPUName() { return "bdver3"; case AMDFAM15H_BDVER4: return "bdver4"; - case AMD_BTVER1: - return "btver1"; - default: - return "amdfam15"; - } - case AMDFAM16H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMD_BTVER2: - return "btver2"; - default: - return "amdfam16"; } + case AMD_BTVER2: + return "btver2"; case AMDFAM17H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMDFAM17H_ZNVER1: - return "znver1"; - default: - return "amdfam17"; - } + return "znver1"; default: - return "generic"; + break; } } return "generic"; @@ -1494,7 +1502,8 @@ bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif std::string sys::getProcessTriple() { - Triple PT(Triple::normalize(LLVM_HOST_TRIPLE)); + std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); + Triple PT(Triple::normalize(TargetTripleString)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index bdd02105f6f0..b1d5e7c0d991 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Mutex.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Config/config.h" //===----------------------------------------------------------------------===// @@ -47,6 +48,10 @@ MutexImpl::MutexImpl( bool recursive) // Declare the pthread_mutex data structures pthread_mutex_t* mutex = static_cast(malloc(sizeof(pthread_mutex_t))); + + if (mutex == nullptr) + report_bad_alloc_error("Mutex allocation failed"); + pthread_mutexattr_t attr; // Initialize the mutex attributes diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc index aad77f19c35a..f05103ccd1eb 100644 --- a/lib/Support/Unix/DynamicLibrary.inc +++ b/lib/Support/Unix/DynamicLibrary.inc @@ -20,6 +20,9 @@ DynamicLibrary::HandleSet::~HandleSet() { ::dlclose(Handle); if (Process) ::dlclose(Process); + + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index 0ba6a25aa198..5580e63893c6 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -34,18 +34,31 @@ static std::string getOSVersion() { return info.release; } -std::string sys::getDefaultTargetTriple() { - std::string TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE); - - // On darwin, we want to update the version to match that of the - // target. +static std::string updateTripleOSVersion(std::string TargetTripleString) { + // On darwin, we want to update the version to match that of the target. std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin"); if (DarwinDashIdx != std::string::npos) { TargetTripleString.resize(DarwinDashIdx + strlen("-darwin")); TargetTripleString += getOSVersion(); + return TargetTripleString; } + std::string::size_type MacOSDashIdx = TargetTripleString.find("-macos"); + if (MacOSDashIdx != std::string::npos) { + TargetTripleString.resize(MacOSDashIdx); + // Reset the OS to darwin as the OS version from `uname` doesn't use the + // macOS version scheme. + TargetTripleString += "-darwin"; + TargetTripleString += getOSVersion(); + } + return TargetTripleString; +} - // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +std::string sys::getDefaultTargetTriple() { + std::string TargetTripleString = + updateTripleOSVersion(LLVM_DEFAULT_TARGET_TRIPLE); + + // Override the default target with an environment variable named by + // LLVM_TARGET_TRIPLE_ENV. #if defined(LLVM_TARGET_TRIPLE_ENV) if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) TargetTripleString = EnvTriple; diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 1704fa479942..c866d5b5a84e 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -40,9 +40,6 @@ #include #endif #ifdef HAVE_POSIX_SPAWN -#ifdef __sun__ -#define _RESTRICT_KYWD -#endif #include #if defined(__APPLE__) diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index caf1a0a658de..083ea902eeb2 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -28,6 +28,8 @@ DynamicLibrary::HandleSet::~HandleSet() { // 'Process' should not be released on Windows. assert((!Process || Process==this) && "Bad Handle"); + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index 7e196cf0ce18..90a6fb316703 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -17,6 +17,10 @@ using namespace llvm; +static std::string updateTripleOSVersion(std::string Triple) { + return Triple; +} + std::string sys::getDefaultTargetTriple() { const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 4a7e0b2b803e..db1fbe069f4d 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -509,7 +509,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, assert(ChainBegin != ChainEnd && "Chain should contain instructions"); do { --I; - Units.accumulateBackward(*I); + Units.accumulate(*I); } while (I != ChainBegin); // Make sure we allocate in-order, to get the cheapest registers first. diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 6f8dd3e3ac0c..b3b738584b40 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -113,7 +113,7 @@ struct LDTLSCleanup : public MachineFunctionPass { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index 0a948812ff33..51700f905979 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -167,6 +167,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, case AArch64::SUBWrs: case AArch64::SUBWrx: IsFlagSetting = false; + LLVM_FALLTHROUGH; case AArch64::ADDSWri: case AArch64::ADDSWrr: case AArch64::ADDSWrs: @@ -226,6 +227,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, case AArch64::SUBXrs: case AArch64::SUBXrx: IsFlagSetting = false; + LLVM_FALLTHROUGH; case AArch64::ADDSXri: case AArch64::ADDSXrr: case AArch64::ADDSXrs: diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 7bf2097c17ce..3682b62d2b84 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2114,7 +2114,7 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type."); - case MVT::i1: VTIsi1 = true; + case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; case MVT::i8: Opc = OpcTable[Idx][0]; break; case MVT::i16: Opc = OpcTable[Idx][1]; break; case MVT::i32: Opc = OpcTable[Idx][2]; break; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index aaf32a499bc3..60fde5caa339 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8364,9 +8364,9 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, /// EXTR instruction extracts a contiguous chunk of bits from two existing /// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. +/// (or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N)) and replaces it +/// with an EXTR. Can't quite be done in TableGen because the two immediates +/// aren't independent. static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -9531,7 +9531,7 @@ static SDValue performPostLD1Combine(SDNode *N, return SDValue(); } -/// Simplify \Addr given that the top byte of it is ignored by HW during +/// Simplify ``Addr`` given that the top byte of it is ignored by HW during /// address translation. static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 314e89bbca86..dba3e4bdf82f 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1282,6 +1282,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::HI: // Z clear and C set case AArch64CC::LS: // Z set or C clear UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::HS: // C set case AArch64CC::LO: // C clear UsedFlags.C = true; @@ -1300,6 +1301,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::GT: // Z clear, N and V the same case AArch64CC::LE: // Z set, N and V differ UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::GE: // N and V the same case AArch64CC::LT: // N and V differ UsedFlags.N = true; @@ -3669,12 +3671,17 @@ enum class FMAInstKind { Default, Indexed, Accumulator }; /// F|MUL I=A,B,0 /// F|ADD R,I,C /// ==> F|MADD R,A,B,C +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the F|ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the F|MUL. In the example above IdxMulOpd is 1. /// \param MaddOpc the opcode fo the f|madd instruction +/// \param RC Register class of operands +/// \param kind of fma instruction (addressing mode) to be generated static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, @@ -3733,6 +3740,9 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// ADD R,I,Imm /// ==> ORR V, ZR, Imm /// ==> MADD R,A,B,V +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction @@ -3741,6 +3751,7 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// \param MaddOpc the opcode fo the madd instruction /// \param VR is a virtual register that holds the value of an ADD operand /// (V in the example above). +/// \param RC Register class of operands static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl &InsInstrs, @@ -4216,26 +4227,36 @@ void AArch64InstrInfo::genAlternativeCodeSequence( /// \brief Replace csincr-branch sequence by simple conditional branch /// /// Examples: -/// 1. +/// 1. \code /// csinc w9, wzr, wzr, /// tbnz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b. +/// \endcode /// -/// 2. +/// 2. \code /// csinc w9, wzr, wzr, /// tbz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b. +/// \endcode /// /// Replace compare and branch sequence by TBZ/TBNZ instruction when the /// compare's constant operand is power of 2. /// /// Examples: +/// \code /// and w8, w8, #0x400 /// cbnz w8, L1 +/// \endcode /// to +/// \code /// tbnz w8, #10, L1 +/// \endcode /// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated @@ -4409,6 +4430,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } +ArrayRef> +AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { + static const std::pair TargetFlags[] = + {{MOSuppressPair, "aarch64-suppress-pair"}}; + return makeArrayRef(TargetFlags); +} + unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, size_t Occurrences, bool CanBeTailCall) const { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 58e9ce583d44..0809ede4df2a 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -263,8 +263,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { /// \param Pattern - combiner pattern bool isThroughputPattern(MachineCombinerPattern Pattern) const override; /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// listed in the array. + /// for an instruction chain ending in ``Root``. All potential patterns are + /// listed in the ``Patterns`` array. bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl &Patterns) const override; @@ -289,6 +289,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { getSerializableDirectMachineOperandTargetFlags() const override; ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + ArrayRef> + getSerializableMachineMemOperandTargetFlags() const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 6cb723d187af..0be14673eb20 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -313,9 +313,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; //===----------------------------------------------------------------------===// // AArch64 Instruction Predicate Definitions. -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; - // We could compute these on a per-module basis but doing so requires accessing // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). @@ -714,10 +711,10 @@ def : InstAlias<"negs $dst, $src$shift", defm UDIV : Div<0, "udiv", udiv>; defm SDIV : Div<1, "sdiv", sdiv>; -def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr $Rn, $Rm)>; +def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; // Variable shift defm ASRV : Shift<0b10, "asr", sra>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 07ce0e863c5e..7e275e4d2f46 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -33,6 +33,8 @@ #define DEBUG_TYPE "aarch64-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -212,6 +214,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 32: @@ -243,7 +246,8 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } - }; + break; + } return GenericOpc; } @@ -267,6 +271,7 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRXui : AArch64::LDRXui; } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 8: @@ -278,7 +283,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRDui : AArch64::LDRDui; } - }; + break; + } return GenericOpc; } @@ -1319,6 +1325,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { case TargetOpcode::G_VASTART: return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) : selectVaStartAAPCS(I, MF, MRI); + case TargetOpcode::G_IMPLICIT_DEF: + I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); + return true; } return false; diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 4b568f3fba2b..4a0a7c36baf8 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -291,11 +291,10 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, unsigned DstPtr; if (Align > PtrSize) { // Realign the list to the actual required alignment. - unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy); - MIRBuilder.buildConstant(AlignMinus1, Align - 1); + auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(ListTmp, List, AlignMinus1); + MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg()); DstPtr = MRI.createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index f3c8e7e9bdc2..4e65c0ab6011 100644 --- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -163,6 +163,7 @@ AArch64RedundantCopyElimination::knownRegValInBlock( case AArch64::ADDSWri: case AArch64::ADDSXri: IsCMN = true; + LLVM_FALLTHROUGH; // CMP is an alias for SUBS with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: { diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index a9a9d5ce8429..a3238cf3b60f 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -81,6 +81,7 @@ void AArch64Subtarget::initializeProperties() { break; case CortexA57: MaxInterleaveFactor = 4; + PrefFunctionAlignment = 4; break; case ExynosM1: MaxInterleaveFactor = 4; @@ -130,7 +131,9 @@ void AArch64Subtarget::initializeProperties() { break; case CortexA35: break; case CortexA53: break; - case CortexA72: break; + case CortexA72: + PrefFunctionAlignment = 4; + break; case CortexA73: break; case Others: break; } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 7933e58c49ee..db53946cbc77 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -218,6 +218,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseLiterals() const { return HasFuseLiterals; } + + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { + return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || + hasFuseAES() || hasFuseLiterals(); + } + bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 1252f9403812..6237b8f3e7b9 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -277,17 +277,19 @@ class AArch64PassConfig : public TargetPassConfig { ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { + const AArch64Subtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createAArch64MacroFusionDAGMutation()); + if (ST.hasFusion()) + DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); - if (ST.hasFuseAES() || ST.hasFuseLiterals()) { + if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 0d860a7eef79..7870dce5c9c0 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -756,7 +756,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDWrs: case AArch64::ANDSWrs: case AArch64::BICWrs: @@ -780,7 +780,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDXrs: case AArch64::ANDSXrs: case AArch64::BICXrs: diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 475f91016840..a7a7daf4b4a5 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -73,7 +73,7 @@ class AArch64AsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -264,7 +264,7 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index fc808ee0cdd6..c25bd8c8f6cc 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -103,4 +103,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { CommentString = ";"; + PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = ".L"; } diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 6f002860044c..ed5370826647 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -108,10 +108,11 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { DFS(Start, Checklist); for (auto &BB : Checklist) { BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? - BasicBlock::iterator(Load) : BB->end(); - if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr), - true, StartIt, BB, Load).isClobber()) - return true; + BasicBlock::iterator(Load) : BB->end(); + auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, + StartIt, BB, Load); + if (Q.isClobber() || Q.isUnknown()) + return true; } return false; } diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b312dbc8d14d..31ee9206ae27 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -380,7 +380,9 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); - if (ST->hasFP32Denormals() && !UnsafeDiv) + + // With UnsafeDiv node will be optimized to just rcp and mul. + if (ST->hasFP32Denormals() || UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 96f819fd0e68..2553cf4da0fe 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2651,8 +2651,11 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } - case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break; - case ISD::ADD: { // Fall through from above + case ISD::OR: + if (!isOrEquivalentToAdd(DAG, LHS)) + break; + LLVM_FALLTHROUGH; + case ISD::ADD: { // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) if (ConstantSDNode *C2 = dyn_cast(LHS->getOperand(1))) { SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 846e7dff5f8c..7e0e9802c0e6 100644 --- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -34,9 +35,14 @@ class AMDGPULowerIntrinsics : public ModulePass { AMDGPULowerIntrinsics() : ModulePass(ID) {} bool runOnModule(Module &M) override; + bool expandMemIntrinsicUses(Function &F); StringRef getPassName() const override { return "AMDGPU Lower Intrinsics"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } }; } @@ -55,7 +61,7 @@ static bool shouldExpandOperationWithSize(Value *Size) { return !CI || (CI->getZExtValue() > MaxStaticSize); } -static bool expandMemIntrinsicUses(Function &F) { +bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) { Intrinsic::ID ID = F.getIntrinsicID(); bool Changed = false; @@ -67,7 +73,10 @@ static bool expandMemIntrinsicUses(Function &F) { case Intrinsic::memcpy: { auto *Memcpy = cast(Inst); if (shouldExpandOperationWithSize(Memcpy->getLength())) { - expandMemCpyAsLoop(Memcpy); + Function *ParentFunc = Memcpy->getParent()->getParent(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(*ParentFunc); + expandMemCpyAsLoop(Memcpy, TTI); Changed = true; Memcpy->eraseFromParent(); } diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp new file mode 100644 index 000000000000..7263ba73d155 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -0,0 +1,64 @@ +//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the AMDGPU implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMacroFusion.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" + +#include "llvm/CodeGen/MacroFusion.h" + +using namespace llvm; + +namespace { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const SIInstrInfo &TII = static_cast(TII_); + + switch (SecondMI.getOpcode()) { + case AMDGPU::V_ADDC_U32_e64: + case AMDGPU::V_SUBB_U32_e64: + case AMDGPU::V_CNDMASK_B32_e64: { + // Try to cluster defs of condition registers to their uses. This improves + // the chance VCC will be available which will allow shrinking to VOP2 + // encodings. + if (!FirstMI) + return true; + + const MachineOperand *Src2 = TII.getNamedOperand(SecondMI, + AMDGPU::OpName::src2); + return FirstMI->definesRegister(Src2->getReg()); + } + default: + return false; + } + + return false; +} + +} // end namespace + + +namespace llvm { + +std::unique_ptr createAMDGPUMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/lib/Target/AMDGPU/AMDGPUMacroFusion.h new file mode 100644 index 000000000000..844958580a65 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.h @@ -0,0 +1,19 @@ +//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); +/// to AMDGPUPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr createAMDGPUMacroFusionDAGMutation(); + +} // llvm diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index be47b900c6f0..1bc5a52053ec 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,14 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "AMDGPUCallLowering.h" +#include "AMDGPUInstructionSelector.h" +#include "AMDGPULegalizerInfo.h" +#include "AMDGPURegisterBankInfo.h" +#endif #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -72,6 +80,31 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct SIGISelActualAccessor : public GISelAccessor { + std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; + const AMDGPUCallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), @@ -265,18 +298,21 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_x: Dim = 0; break; case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_y: Dim = 1; break; case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_z: Dim = 2; break; @@ -317,11 +353,23 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, TLInfo(TM, *this) {} SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : - AMDGPUSubtarget(TT, GPU, FS, TM), - InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) {} + const TargetMachine &TM) + : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new AMDGPULegalizerInfo()); + + GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + GISel->InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast(GISel->RegBankInfo.get()))); +#endif + setGISelAccessor(*GISel); +} void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 425fd35d47de..dc868f010d85 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -19,9 +19,7 @@ #include "AMDGPUCallLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "AMDGPURegisterBankInfo.h" -#endif +#include "AMDGPUMacroFusion.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNIterativeScheduler.h" @@ -85,7 +83,7 @@ static cl::opt EnableLoadStoreVectorizer( static cl::opt ScalarizeGlobal( "amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), - cl::init(false), + cl::init(true), cl::Hidden); // Option to run internalize pass. @@ -176,6 +174,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { new GCNScheduleDAGMILive(C, make_unique(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -389,31 +388,6 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct SIGISelActualAccessor : public GISelAccessor { - std::unique_ptr CallLoweringInfo; - std::unique_ptr InstSelector; - std::unique_ptr Legalizer; - std::unique_ptr RegBankInfo; - const AMDGPUCallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -435,21 +409,6 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique(TargetTriple, GPU, FS, *this); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AMDGPUCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AMDGPULegalizerInfo()); - - GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); - GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, - *static_cast(GISel->RegBankInfo.get()))); -#endif - - I->setGISelAccessor(*GISel); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 7b8756050b75..e3c90f250600 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1058,17 +1058,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseOModOperand(OperandVector &Operands); - void cvtId(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); - - void cvtVOP3Impl(MCInst &Inst, - const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx); + void cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); - void cvtMIMG(MCInst &Inst, const OperandVector &Operands); + void cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); @@ -3870,13 +3866,19 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { // mimg //===----------------------------------------------------------------------===// -void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { +void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } + if (IsAtomic) { + // Add src, same as dst + ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + } + OptionalImmIndexMap OptionalIdx; for (unsigned E = Operands.size(); I != E; ++I) { @@ -3904,39 +3906,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - - // Add src, same as dst - ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); - - OptionalImmIndexMap OptionalIdx; - - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - - // Add the register arguments - if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - continue; - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - llvm_unreachable("unexpected operand type"); - } - } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + cvtMIMG(Inst, Operands, true); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { @@ -4118,25 +4088,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - for (unsigned E = Operands.size(); I != E; ++I) - ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1); -} - -void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::VOP3) { - cvtVOP3(Inst, Operands); - } else { - cvtId(Inst, Operands); - } -} - static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -4148,91 +4099,78 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; } -void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + unsigned Opc = Inst.getOpcode(); + unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { - Op.addRegOrImmWithFPInputModsOperands(Inst, 2); - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - } else { - llvm_unreachable("unhandled operand type"); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + // This instruction has src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); + } else { + llvm_unreachable("unhandled operand type"); + } + } + } else { + // No src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } } } -} -void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - OptionalImmIndexMap OptionalIdx; + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } - cvtVOP3Impl(Inst, Operands, OptionalIdx); - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0 - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si || - Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || - Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) { + if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F16_e64_vi) { auto it = Inst.begin(); - std::advance( - it, - AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ? - AMDGPU::V_MAC_F16_e64 : - AMDGPU::V_MAC_F32_e64, - AMDGPU::OpName::src2_modifiers)); + std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 ++it; Inst.insert(it, Inst.getOperand(0)); // src2 = dst } } -void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; - - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isMod()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - Op.addRegOrImmOperands(Inst, 1); - } - } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + cvtVOP3(Inst, Operands, OptionalIdx); } void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptIdx; - cvtVOP3Impl(Inst, Operands, OptIdx); + cvtVOP3(Inst, Operands, OptIdx); // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers int Opc = Inst.getOpcode(); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); - } - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); @@ -4284,7 +4222,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - Inst.getOperand(ModIdx).setImm(ModVal); + Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); } } diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 917d9cfa6905..971208c5db84 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPULowerIntrinsics.cpp + AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp AMDGPUMachineCFGStructurizer.cpp AMDGPUMachineFunction.cpp diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 8ead48067336..2e7641cda375 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -17,7 +17,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { std::vector makeMinRegSchedule(ArrayRef TopRoots, diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index d378df674be9..0657f67b217d 100644 --- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -15,7 +15,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace { class GCNMinRegScheduler { diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 390a8286c76a..1d02c7fdffbf 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -16,7 +16,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 8ec46665daf5..155b400ba022 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/MathExtras.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" using namespace llvm; diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index 3ed3cd5b3b1c..060d2ca72d93 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -66,7 +66,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { const SIMachineFunctionInfo &MFI; - // Occupancy target at the begining of function scheduling cycle. + // Occupancy target at the beginning of function scheduling cycle. unsigned StartingOccupancy; // Minimal real occupancy recorder for the function. diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 2b408ff10caa..a50e3eb8d9ce 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -32,7 +32,7 @@ class AMDGPUAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { @@ -100,7 +100,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index a515eecc222a..06e2c11b0193 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -26,6 +26,7 @@ class MIMG_Helper op, string asm, diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 6993e8a62a9c..00cbd24b84fb 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -555,7 +555,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass { CFStack.pushBranch(AMDGPU::CF_PUSH_EG); } else CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); - + LLVM_FALLTHROUGH; case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 215791f4f92d..69a63b6941ef 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1618,7 +1618,8 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } -bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { +bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { // Local and Private addresses do not handle vectors. Limit to i32 if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { return (MemVT.getSizeInBits() <= 32); diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index d6a0876a6ee7..2a774693f02b 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -44,7 +44,8 @@ class R600TargetLowering final : public AMDGPUTargetLowering { EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; - bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index 47fda1c8fa82..a7e540f9d14d 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -22,7 +22,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f391f67a241f..3af242d9ea66 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -137,6 +137,7 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII, = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } + return false; } default: return false; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d39b345bdf03..2ba570b9ebbb 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -547,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = 0; const ConstantInt *Vol = dyn_cast(CI.getOperand(4)); - Info.vol = !Vol || !Vol->isNullValue(); + Info.vol = !Vol || !Vol->isZero(); Info.readMem = true; Info.writeMem = true; return true; @@ -713,7 +713,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } -bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { return (MemVT.getSizeInBits() <= 4 * 32); } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { @@ -2374,20 +2375,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast(N->getOperand(0))->getZExtValue(); - switch (IID) { - case Intrinsic::amdgcn_cvt_pkrtz: { + if (IID == Intrinsic::amdgcn_cvt_pkrtz) { SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); SDLoc SL(N); SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32, Src0, Src1); - Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt)); return; } - default: - break; - } + break; } case ISD::SELECT: { SDLoc SL(N); @@ -3736,7 +3733,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); - bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags Flags = Op->getFlags(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || + Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -3771,15 +3770,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags Flags = Op->getFlags(); - - if (Unsafe || Flags.hasAllowReciprocal()) { + if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags NewFlags; - NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags); } return SDValue(); @@ -4622,15 +4617,99 @@ SDValue SITargetLowering::performClassCombine(SDNode *N, return SDValue(); } +static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { + if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) + return true; + + return DAG.isKnownNeverNaN(Op); +} + +static bool isCanonicalized(SDValue Op, const SISubtarget *ST, + unsigned MaxDepth=5) { + // If source is a result of another standard FP operation it is already in + // canonical form. + + switch (Op.getOpcode()) { + default: + break; + + // These will flush denorms if required. + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FSQRT: + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FMA: + case ISD::FMAD: + + case ISD::FCANONICALIZE: + return true; + + case ISD::FP_ROUND: + return Op.getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP_EXTEND: + return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + return ST->hasFP16Denormals(); + + // It can/will be lowered or combined as a bit operation. + // Need to check their input recursively to handle. + case ISD::FNEG: + case ISD::FABS: + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1); + + case ISD::FSIN: + case ISD::FCOS: + case ISD::FSINCOS: + return Op.getValueType().getScalarType() != MVT::f16; + + // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. + // For such targets need to check their input recursively. + // TODO: on GFX9+ we could return true without checking provided no-nan + // mode, since canonicalization is also used to quiet sNaNs. + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) && + isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1); + + case ISD::ConstantFP: { + auto F = cast(Op)->getValueAPF(); + return !F.isDenormal() && !(F.isNaN() && F.isSignaling()); + } + } + return false; +} + // Constant fold canonicalize. SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { - ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); - if (!CFP) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; + ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); + + if (!CFP) { + SDValue N0 = N->getOperand(0); + + bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction()); + + if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) && + isCanonicalized(N0, getSubtarget())) + return N0; + + return SDValue(); + } + const APFloat &C = CFP->getValueAPF(); // Flush denormals to 0 if not enabled. @@ -4723,13 +4802,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine( return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3); } -static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { - if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) - return true; - - return DAG.isKnownNeverNaN(Op); -} - SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, SDValue Op0, diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 24f88e632d38..83392a7ab1b2 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -153,7 +153,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; - bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index b6784ec14e9f..160f8837d49c 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2022,10 +2022,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, return nullptr; case AMDGPU::V_MAC_F16_e64: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: break; case AMDGPU::V_MAC_F16_e32: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -4320,6 +4322,24 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const return new GCNHazardRecognizer(MF); } +std::pair +SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & MO_MASK, TF & ~MO_MASK); +} + +ArrayRef> +SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + static const std::pair TargetFlags[] = { + { MO_GOTPCREL, "amdgpu-gotprel" }, + { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, + { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, + { MO_REL32_LO, "amdgpu-rel32-lo" }, + { MO_REL32_HI, "amdgpu-rel32-hi" } + }; + + return makeArrayRef(TargetFlags); +} + bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 74b48c761808..d00c0d4a7f4e 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -100,6 +100,8 @@ class SIInstrInfo final : public AMDGPUInstrInfo { public: enum TargetOperandFlags { + MO_MASK = 0x7, + MO_NONE = 0, // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. MO_GOTPCREL = 1, @@ -781,9 +783,15 @@ class SIInstrInfo final : public AMDGPUInstrInfo { void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const; + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef> getSerializableTargetIndices() const override; + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 4a81fb3b463a..ffb01363e131 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1502,6 +1502,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; +def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; + def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index bb17dbbdfbd6..34886c48f461 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -38,7 +38,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This scheduler implements a different scheduling algorithm than // GenericScheduler. diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 96a18544f02a..874fbadca7f3 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -110,10 +110,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - const MachineOperand *Src1Mod = - TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - - if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) + if (Src1 && (!isVGPR(Src1, TRI, MRI) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) return false; // We don't need to check src0, all input types are legal, so just make sure @@ -122,58 +120,64 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, return false; // Check output modifiers - if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) - return false; - - return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); } /// \brief This function checks \p MI for operands defined by a move immediate /// instruction and then folds the literal constant into the instruction if it -/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction -/// and will only fold literal constants if we are still in SSA. -static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. +static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute = true) { - - if (!MRI.isSSA()) - return; - assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - // Only one literal constant is allowed per instruction, so if src0 is a - // literal constant then we can't do any folding. - if (TII->isLiteralConstant(MI, Src0Idx)) - return; - // Try to fold Src0 MachineOperand &Src0 = MI.getOperand(Src0Idx); - if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) { + if (Src0.isReg()) { unsigned Reg = Src0.getReg(); - MachineInstr *Def = MRI.getUniqueVRegDef(Reg); - if (Def && Def->isMoveImmediate()) { - MachineOperand &MovSrc = Def->getOperand(1); - bool ConstantFolded = false; + if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } - if (ConstantFolded) { - if (MRI.use_empty(Reg)) + if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || + isUInt<32>(MovSrc.getImm()))) { + // It's possible to have only one component of a super-reg defined by + // a single mov, so we need to clear any subregister flag. + Src0.setSubReg(0); + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.setSubReg(0); + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } + + if (ConstantFolded) { + assert(MRI.use_empty(Reg)); Def->eraseFromParent(); - ++NumLiteralConstantsFolded; - return; + ++NumLiteralConstantsFolded; + return true; + } } } } // We have failed to fold src0, so commute the instruction and try again. - if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI)) - foldImmediates(MI, TII, MRI, false); + if (TryToCommute && MI.isCommutable()) { + if (TII->commuteInstruction(MI)) { + if (foldImmediates(MI, TII, MRI, false)) + return true; + // Commute back. + TII->commuteInstruction(MI); + } + } + + return false; } // Copy MachineOperand with all flags except setting it as implicit. diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp index 9908fc003ce7..92fb762ebd73 100644 --- a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp +++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; -/// \brief The target which suports all AMD GPUs. This will eventually +/// \brief The target which supports all AMD GPUs. This will eventually /// be deprecated and there will be a R600 target and a GCN target. Target &llvm::getTheAMDGPUTarget() { static Target TheAMDGPUTarget; diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 96d343099132..f2de1f995726 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -16,12 +16,21 @@ class VOP3PInst !if(P.HasModifiers, getVOP3PModPat.ret, getVOP3Pat.ret) >; -// Non-packed instructions that use the VOP3P encoding. i.e. where -// omod/abs are used. +// Non-packed instructions that use the VOP3P encoding. +// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. class VOP3_VOP3PInst : - VOP3P_Pseudo.ret, getVOP3Pat.ret) ->; + VOP3P_Pseudo { + let InOperandList = + (ins + FP32InputMods:$src0_modifiers, VCSrc_f32:$src0, + FP32InputMods:$src1_modifiers, VCSrc_f32:$src1, + FP32InputMods:$src2_modifiers, VCSrc_f32:$src2, + clampmod:$clamp, + op_sel:$op_sel, + op_sel_hi:$op_sel_hi); + let AsmOperands = + " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; +} let isCommutable = 1 in { def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile, fma>; @@ -46,9 +55,12 @@ def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile, lshr_rev>; // XXX - Commutable? -def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile>; -def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile>; -def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile>; +// These are VOP3a-like opcodes which accept no omod. +// Size of src arguments (16/32) is controlled by op_sel. +// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. +def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile>; +def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile>; +def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile>; multiclass VOP3P_Real_vi op> { diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index e386f21c2ba4..77b7952b22a8 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -51,12 +51,8 @@ class VOP3Common pattern = [], let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = - !if(!eq(VOP3Only,1), - !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), - !if(!eq(P.HasModifiers, 1), - "cvtVOP3_2_mod", - !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "") - ) - ); + !if(!and(P.IsPacked, isVOP3P), + "cvtVOP3P", + !if(!or(P.HasModifiers, P.HasOMod), + "cvtVOP3", + "")); VOPProfile Pfl = P; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 90f635c81254..582153daebde 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; + LLVM_FALLTHROUGH; case ARM::STMDB_UPD: case ARM::t2STMDB_UPD: case ARM::VSTMDDB_UPD: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1ec6b24b2ed6..3cf5950a1918 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1880,6 +1880,9 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, // Diamond: TBB is the block that is branched to, FBB is the fallthrough TUnpredCycles = TCycles + TakenBranchCost; FUnpredCycles = FCycles + NotTakenBranchCost; + // The branch at the end of FBB will disappear when it's predicated, so + // discount it from PredCost. + PredCost -= 1 * ScalingUpFactor; } // The total cost is the cost of each path scaled by their probabilites unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b4fb292c0116..e97a7ce5067f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); } - const TargetRegisterClass *RC = &ARM::GPRPairRegClass; - for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) - for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + const TargetRegisterClass &RC = ARM::GPRPairRegClass; + for (unsigned Reg : RC) + for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) + markSuperRegs(Reserved, Reg); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; @@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(PairedPhys); // Then prefer even or odd registers. - for (unsigned I = 0, E = Order.size(); I != E; ++I) { - unsigned Reg = Order[I]; + for (unsigned Reg : Order) { if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) continue; // Don't provide hints that are paired to a reserved register. @@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; - - while (!MI->getOperand(i).isFI()) { - ++i; - assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); - } + for (; !MI->getOperand(i).isFI(); ++i) + assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index e498f70b820d..051827a6a6a2 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -321,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - // The necesary extensions are handled on the other side of the ABI + // The necessary extensions are handled on the other side of the ABI // boundary. markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e42514acd76f..6ba7593543a9 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3398,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc dl(Op); - ConstantSDNode *ScopeN = cast(Op.getOperand(2)); - auto Scope = static_cast(ScopeN->getZExtValue()); - if (Scope == SynchronizationScope::SingleThread) + ConstantSDNode *SSIDNode = cast(Op.getOperand(2)); + auto SSID = static_cast(SSIDNode->getZExtValue()); + if (SSID == SyncScope::SingleThread) return Op; if (!Subtarget->hasDataBarrier()) { @@ -5356,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; - case ISD::SETLT: Swap = true; + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; - case ISD::SETLE: Swap = true; + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } @@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5044134f5b1e..f05b14255236 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -510,7 +510,8 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 53db5acbe805..42eac12e457b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4799,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", // Pseudo instruction ldr Rt, =immediate def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", - (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $immediate", (t2LDRConstPool GPRnopc:$Rt, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 374176d1d737..29ef69ad0010 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -20,6 +20,8 @@ #define DEBUG_TYPE "arm-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -42,13 +44,32 @@ class ARMInstructionSelector : public InstructionSelector { private: bool selectImpl(MachineInstr &I) const; - bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + struct CmpConstants; + struct InsertInfo; - bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const; + + // Helper for inserting a comparison sequence that sets \p ResReg to either 1 + // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or + // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS). + bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg, + ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const; + + // Set \p DestReg to \p Constant. + void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + + bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + + // Check if the types match and both operands have the expected size and + // register bank. + bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS, + unsigned ExpectedSize, unsigned ExpectedRegBankID) const; + + // Check if the register has the expected size and register bank. + bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize, + unsigned ExpectedRegBankID) const; const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -251,120 +272,233 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } -static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { +// When lowering comparisons, we sometimes need to perform two compares instead +// of just one. Get the condition codes for both comparisons. If only one is +// needed, the second member of the pair is ARMCC::AL. +static std::pair +getComparePreds(CmpInst::Predicate Pred) { + std::pair Preds = {ARMCC::AL, ARMCC::AL}; switch (Pred) { - // Needs two compares... case CmpInst::FCMP_ONE: + Preds = {ARMCC::GT, ARMCC::MI}; + break; case CmpInst::FCMP_UEQ: - default: - // AL is our "false" for now. The other two need more compares. - return ARMCC::AL; + Preds = {ARMCC::EQ, ARMCC::VS}; + break; case CmpInst::ICMP_EQ: case CmpInst::FCMP_OEQ: - return ARMCC::EQ; + Preds.first = ARMCC::EQ; + break; case CmpInst::ICMP_SGT: case CmpInst::FCMP_OGT: - return ARMCC::GT; + Preds.first = ARMCC::GT; + break; case CmpInst::ICMP_SGE: case CmpInst::FCMP_OGE: - return ARMCC::GE; + Preds.first = ARMCC::GE; + break; case CmpInst::ICMP_UGT: case CmpInst::FCMP_UGT: - return ARMCC::HI; + Preds.first = ARMCC::HI; + break; case CmpInst::FCMP_OLT: - return ARMCC::MI; + Preds.first = ARMCC::MI; + break; case CmpInst::ICMP_ULE: case CmpInst::FCMP_OLE: - return ARMCC::LS; + Preds.first = ARMCC::LS; + break; case CmpInst::FCMP_ORD: - return ARMCC::VC; + Preds.first = ARMCC::VC; + break; case CmpInst::FCMP_UNO: - return ARMCC::VS; + Preds.first = ARMCC::VS; + break; case CmpInst::FCMP_UGE: - return ARMCC::PL; + Preds.first = ARMCC::PL; + break; case CmpInst::ICMP_SLT: case CmpInst::FCMP_ULT: - return ARMCC::LT; + Preds.first = ARMCC::LT; + break; case CmpInst::ICMP_SLE: case CmpInst::FCMP_ULE: - return ARMCC::LE; + Preds.first = ARMCC::LE; + break; case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: - return ARMCC::NE; + Preds.first = ARMCC::NE; + break; case CmpInst::ICMP_UGE: - return ARMCC::HS; + Preds.first = ARMCC::HS; + break; case CmpInst::ICMP_ULT: - return ARMCC::LO; + Preds.first = ARMCC::LO; + break; + default: + break; } + assert(Preds.first != ARMCC::AL && "No comparisons needed?"); + return Preds; } -bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - auto &MBB = *MIB->getParent(); - auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); +struct ARMInstructionSelector::CmpConstants { + CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank, + unsigned OpSize) + : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode), + OperandRegBankID(OpRegBank), OperandSize(OpSize) {} - // Move 0 into the result register. - auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi)) - .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass)) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI)) + // The opcode used for performing the comparison. + const unsigned ComparisonOpcode; + + // The opcode used for reading the flags set by the comparison. May be + // ARM::INSTRUCTION_LIST_END if we don't need to read the flags. + const unsigned ReadFlagsOpcode; + + // The assumed register bank ID for the operands. + const unsigned OperandRegBankID; + + // The assumed size in bits for the operands. + const unsigned OperandSize; +}; + +struct ARMInstructionSelector::InsertInfo { + InsertInfo(MachineInstrBuilder &MIB) + : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())), + DbgLoc(MIB->getDebugLoc()) {} + + MachineBasicBlock &MBB; + const MachineBasicBlock::instr_iterator InsertBefore; + const DebugLoc &DbgLoc; +}; + +void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg, + unsigned Constant) const { + (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi)) + .addDef(DestReg) + .addImm(Constant) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); +} + +bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI, + unsigned LHSReg, unsigned RHSReg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + return MRI.getType(LHSReg) == MRI.getType(RHSReg) && + validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) && + validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID); +} + +bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) { + DEBUG(dbgs() << "Unexpected size for register"); return false; + } - // Perform the comparison. - auto LHSReg = MIB->getOperand(2).getReg(); - auto RHSReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) && - MRI.getType(LHSReg).getSizeInBits() == 32 && - MRI.getType(RHSReg).getSizeInBits() == 32 && - "Unsupported types for comparison operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr)) - .addUse(LHSReg) - .addUse(RHSReg) - .add(predOps(ARMCC::AL)); - if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) { + DEBUG(dbgs() << "Unexpected register bank for register"); return false; + } + + return true; +} + +bool ARMInstructionSelector::selectCmp(CmpConstants Helper, + MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + const InsertInfo I(MIB); - // Move 1 into the result register if the flags say so. auto ResReg = MIB->getOperand(0).getReg(); + if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID)) + return false; + auto Cond = static_cast(MIB->getOperand(1).getPredicate()); - auto ARMCond = getComparePred(Cond); - if (ARMCond == ARMCC::AL) + if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) { + putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0); + MIB->eraseFromParent(); + return true; + } + + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize, + Helper.OperandRegBankID)) return false; - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi)) - .addDef(ResReg) - .addUse(Mov0I->getOperand(0).getReg()) - .addImm(1) - .add(predOps(ARMCond, ARM::CPSR)); - if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) - return false; + auto ARMConds = getComparePreds(Cond); + auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass); + putConstant(I, ZeroReg, 0); + + if (ARMConds.second == ARMCC::AL) { + // Simple case, we only need one comparison and we're done. + if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg, + ZeroReg)) + return false; + } else { + // Not so simple, we need two successive comparisons. + auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass); + if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg, + RHSReg, ZeroReg)) + return false; + if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg, + IntermediateRes)) + return false; + } MIB->eraseFromParent(); return true; } +bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, + unsigned ResReg, + ARMCC::CondCodes Cond, + unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const { + // Perform the comparison. + auto CmpI = + BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Read the comparison flags (if necessary). + if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) { + auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, + TII.get(Helper.ReadFlagsOpcode)) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI)) + return false; + } + + // Select either 1 or the previous result based on the value of the flags. + auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi)) + .addDef(ResReg) + .addUse(PrevRes) + .addImm(1) + .add(predOps(Cond, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + return true; +} + bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { + MachineRegisterInfo &MRI) const { auto &MBB = *MIB->getParent(); auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); + auto &DbgLoc = MIB->getDebugLoc(); // Compare the condition to 0. auto CondReg = MIB->getOperand(1).getReg(); - assert(MRI.getType(CondReg).getSizeInBits() == 1 && - RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri)) + auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri)) .addUse(CondReg) .addImm(0) .add(predOps(ARMCC::AL)); @@ -376,13 +510,10 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, auto ResReg = MIB->getOperand(0).getReg(); auto TrueReg = MIB->getOperand(2).getReg(); auto FalseReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(ResReg) == MRI.getType(TrueReg) && - MRI.getType(TrueReg) == MRI.getType(FalseReg) && - MRI.getType(FalseReg).getSizeInBits() == 32 && - RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID && - RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) && + validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr)) + auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr)) .addDef(ResReg) .addUse(TrueReg) .addUse(FalseReg) @@ -494,10 +625,32 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } - case G_ICMP: - return selectICmp(MIB, TII, MRI, TRI, RBI); case G_SELECT: - return selectSelect(MIB, TII, MRI, TRI, RBI); + return selectSelect(MIB, MRI); + case G_ICMP: { + CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, + ARM::GPRRegBankID, 32); + return selectCmp(Helper, MIB, MRI); + } + case G_FCMP: { + assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + + unsigned OpReg = I.getOperand(2).getReg(); + unsigned Size = MRI.getType(OpReg).getSizeInBits(); + + if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + DEBUG(dbgs() << "Subtarget only supports single precision"); + return false; + } + if (Size != 32 && Size != 64) { + DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); + return false; + } + + CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, + ARM::FPRRegBankID, Size); + return selectCmp(Helper, MIB, MRI); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); @@ -510,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); - if (MRI.getType(Reg).getSizeInBits() != 32) + + if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; - assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index f3e62d09cc30..f23e62595d2e 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -28,6 +28,10 @@ using namespace llvm; #error "You shouldn't build this" #endif +static bool AEABI(const ARMSubtarget &ST) { + return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); +} + ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; @@ -66,8 +70,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (unsigned Op : {G_SREM, G_UREM}) if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); - else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || - ST.isTargetMuslAEABI()) + else if (AEABI(ST)) setAction({Op, s32}, Custom); else setAction({Op, s32}, Libcall); @@ -86,6 +89,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_SELECT, 1, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({G_CONSTANT, Ty}, WidenScalar); setAction({G_ICMP, s1}, Legal); for (auto Ty : {s8, s16}) @@ -99,9 +104,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Legal); + setAction({G_FCMP, 1, s64}, Legal); } else { for (auto Ty : {s32, s64}) setAction({G_FADD, Ty}, Libcall); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Custom); + setAction({G_FCMP, 1, s64}, Custom); + + if (AEABI(ST)) + setFCmpLibcallsAEABI(); + else + setFCmpLibcallsGNU(); } for (unsigned Op : {G_FREM, G_FPOW}) @@ -111,11 +129,120 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } +void ARMLegalizerInfo::setFCmpLibcallsAEABI() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; +} + +void ARMLegalizerInfo::setFCmpLibcallsGNU() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, + {RTLIB::UO_F32, CmpInst::ICMP_NE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, + {RTLIB::UO_F64, CmpInst::ICMP_NE}}; +} + +ARMLegalizerInfo::FCmpLibcallsList +ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, + unsigned Size) const { + assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); + if (Size == 32) + return FCmp32Libcalls[Predicate]; + if (Size == 64) + return FCmp64Libcalls[Predicate]; + llvm_unreachable("Unsupported size for FCmp predicate"); +} + bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return false; @@ -137,9 +264,9 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, auto RetVal = MRI.createGenericVirtualRegister( getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); - auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, - {{MI.getOperand(1).getReg(), ArgTy}, - {MI.getOperand(2).getReg(), ArgTy}}); + auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); if (Status != LegalizerHelper::Legalized) return false; @@ -149,8 +276,76 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MIRBuilder.buildUnmerge( {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, RetVal); + break; + } + case G_FCMP: { + assert(MRI.getType(MI.getOperand(2).getReg()) == + MRI.getType(MI.getOperand(3).getReg()) && + "Mismatched operands for G_FCMP"); + auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); - return LegalizerHelper::Legalized; + auto OriginalResult = MI.getOperand(0).getReg(); + auto Predicate = + static_cast(MI.getOperand(1).getPredicate()); + auto Libcalls = getFCmpLibcalls(Predicate, OpSize); + + if (Libcalls.empty()) { + assert((Predicate == CmpInst::FCMP_TRUE || + Predicate == CmpInst::FCMP_FALSE) && + "Predicate needs libcalls, but none specified"); + MIRBuilder.buildConstant(OriginalResult, + Predicate == CmpInst::FCMP_TRUE ? 1 : 0); + MI.eraseFromParent(); + return true; + } + + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); + auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + auto *RetTy = Type::getInt32Ty(Ctx); + + SmallVector Results; + for (auto Libcall : Libcalls) { + auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); + auto Status = + createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, + {{MI.getOperand(2).getReg(), ArgTy}, + {MI.getOperand(3).getReg(), ArgTy}}); + + if (Status != LegalizerHelper::Legalized) + return false; + + auto ProcessedResult = + Libcalls.size() == 1 + ? OriginalResult + : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); + + // We have a result, but we need to transform it into a proper 1-bit 0 or + // 1, taking into account the different peculiarities of the values + // returned by the comparison functions. + CmpInst::Predicate ResultPred = Libcall.Predicate; + if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { + // We have a nice 0 or 1, and we just need to truncate it back to 1 bit + // to keep the types consistent. + MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); + } else { + // We need to compare against 0. + assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); + auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); + } + Results.push_back(ProcessedResult); + } + + if (Results.size() != 1) { + assert(Results.size() == 2 && "Unexpected number of results"); + MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); + } + break; } } + + MI.eraseFromParent(); + return true; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h index a9bdd367737e..78ab9412c04b 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.h +++ b/lib/Target/ARM/ARMLegalizerInfo.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/Instructions.h" namespace llvm { @@ -27,6 +30,36 @@ class ARMLegalizerInfo : public LegalizerInfo { bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; + +private: + void setFCmpLibcallsGNU(); + void setFCmpLibcallsAEABI(); + + struct FCmpLibcallInfo { + // Which libcall this is. + RTLIB::Libcall LibcallID; + + // The predicate to be used when comparing the value returned by the + // function with a relevant constant (currently hard-coded to zero). This is + // necessary because often the libcall will return e.g. a value greater than + // 0 to represent 'true' and anything negative to represent 'false', or + // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is + // needed, this should be CmpInst::BAD_ICMP_PREDICATE. + CmpInst::Predicate Predicate; + }; + using FCmpLibcallsList = SmallVector; + + // Map from each FCmp predicate to the corresponding libcall infos. A FCmp + // instruction may be lowered to one or two libcalls, which is why we need a + // list. If two libcalls are needed, their results will be OR'ed. + using FCmpLibcallsMapTy = IndexedMap; + + FCmpLibcallsMapTy FCmp32Libcalls; + FCmpLibcallsMapTy FCmp64Libcalls; + + // Get the libcall(s) corresponding to \p Predicate for operands of \p Size + // bits. + FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 11fb81a4f9fe..c0c09e8c15af 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; @@ -236,26 +234,31 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; case G_LOAD: - case G_STORE: + case G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); OperandsMapping = Ty.getSizeInBits() == 64 ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], &ARM::ValueMappings[ARM::GPR3OpsIdx]}) : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; - case G_FADD: + } + case G_FADD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && "Unsupported size for G_FADD"); OperandsMapping = Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; + } case G_CONSTANT: case G_FRAME_INDEX: OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; case G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); (void)Ty2; assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); @@ -277,9 +280,29 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } + case G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP"); + assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() && + "Mismatched operand sizes for G_FCMP"); + + unsigned Size = Ty1.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx] + : &ARM::ValueMappings[ARM::DPR3OpsIdx]; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + FPRValueMapping, FPRValueMapping}); + break; + } case G_MERGE_VALUES: { // We only support G_MERGE_VALUES for creating a double precision floating // point value out of two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || @@ -294,6 +317,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_UNMERGE_VALUES: { // We only support G_UNMERGE_VALUES for splitting a double precision // floating point value into two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 8eb9dbf5f9de..51b0fedd2b54 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 8a1a37863877..0695a4e63346 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase { const ARMSubtarget *ST; const ARMTargetLowering *TLI; + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; + const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -41,6 +74,9 @@ class ARMTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 891b5c60e1fd..1129826f21f6 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5249,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // Fall though for the Identifier case that is not a register or a // special name. + LLVM_FALLTHROUGH; } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets @@ -8992,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } +std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -9085,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction", + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = ARMMnemonicSpellCheck( + ((ARMOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); + } case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 22de728fe06e..a77df7a2598f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -361,9 +361,8 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, uint64_t Value, - bool IsPCRel, MCContext &Ctx, - bool IsLittleEndian, - bool IsResolved) const { + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const { unsigned Kind = Fixup.getKind(); // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT @@ -392,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -404,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return Value; } case ARM::fixup_t2_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -885,11 +884,11 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); MCContext &Ctx = Asm.getContext(); - Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx, - IsLittleEndian, true); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, + IsLittleEndian); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 84b54bbb9a49..02374966dafe 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -42,13 +42,13 @@ class ARMAsmBackend : public MCAsmBackend { const MCValue &Target) override; unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, uint64_t Value, bool IsPCRel, - MCContext &Ctx, bool IsLittleEndian, - bool IsResolved) const; + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 0b6574c37de1..5709b4e61798 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R12: if (STI.splitFramePushPop(MF)) break; - // fallthough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index f0c7b11895b4..c058c9e1f534 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -149,7 +149,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, (void)MO; assert(MO.isReg() && "Unexpected inline asm memory operand"); - // TODO: We can look up the alternative name for the register if it's given. + // TODO: We should be able to look up the alternative name for + // the register if it's given. + // TableGen doesn't expose a way of getting retrieving names + // for registers. if (MI->getOperand(OpNum).getReg() == AVR::R31R30) { O << "Z"; } else { diff --git a/lib/Target/AVR/AVRDevices.td b/lib/Target/AVR/AVRDevices.td index 9224af613d14..62def4574437 100644 --- a/lib/Target/AVR/AVRDevices.td +++ b/lib/Target/AVR/AVRDevices.td @@ -6,7 +6,6 @@ // :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD. // In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`. // avr2 (with SRAM) adds the rest of the variants. -// :TODO: s/AVRTiny/Tiny // A feature set aggregates features, grouping them. We don't want to create a @@ -136,7 +135,7 @@ def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">; def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">; def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">; def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">; -def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; +def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">; def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">; def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">; @@ -189,7 +188,7 @@ def FamilyAVR51 : Family<"avr51", def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>; -def FamilyAVRTiny : Family<"avrtiny", +def FamilyTiny : Family<"avrtiny", [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>; @@ -240,7 +239,7 @@ def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>; def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>; def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>; -def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"avrtiny", FamilyTiny, ELFArchTiny>; // Specific MCUs def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>; @@ -480,12 +479,12 @@ def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>; def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>; def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>; -def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny102", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny104", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny4", FamilyTiny, ELFArchTiny>; +def : Device<"attiny5", FamilyTiny, ELFArchTiny>; +def : Device<"attiny9", FamilyTiny, ELFArchTiny>; +def : Device<"attiny10", FamilyTiny, ELFArchTiny>; +def : Device<"attiny20", FamilyTiny, ELFArchTiny>; +def : Device<"attiny40", FamilyTiny, ELFArchTiny>; +def : Device<"attiny102", FamilyTiny, ELFArchTiny>; +def : Device<"attiny104", FamilyTiny, ELFArchTiny>; diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp index afba66b2e69b..744aa723c416 100644 --- a/lib/Target/AVR/AVRInstrInfo.cpp +++ b/lib/Target/AVR/AVRInstrInfo.cpp @@ -402,7 +402,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { - assert(!BytesAdded && "code size not handled"); + if (BytesAdded) *BytesAdded = 0; // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -411,19 +411,24 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) { assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); return 1; } // Conditional branch. unsigned Count = 0; AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm(); - BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + + if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI); ++Count; if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI); ++Count; } @@ -432,7 +437,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { - assert(!BytesRemoved && "code size not handled"); + if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; @@ -450,6 +455,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, } // Remove the branch. + if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I); I->eraseFromParent(); I = MBB.end(); ++Count; @@ -494,5 +500,61 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { } } +MachineBasicBlock * +AVRInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + case AVR::RCALLk: + case AVR::RJMPk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return MI.getOperand(0).getMBB(); + case AVR::BRBSsk: + case AVR::BRBCsk: + return MI.getOperand(1).getMBB(); + case AVR::SBRCRrB: + case AVR::SBRSRrB: + case AVR::SBICAb: + case AVR::SBISAb: + llvm_unreachable("unimplemented branch instructions"); + } +} + +bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + + switch (BranchOp) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + assert(BrOffset >= 0 && "offset must be absolute address"); + return isUIntN(16, BrOffset); + case AVR::RCALLk: + case AVR::RJMPk: + return isIntN(13, BrOffset); + case AVR::BRBSsk: + case AVR::BRBCsk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return isIntN(7, BrOffset); + } +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h index c5105dafe5eb..f42d34fb2848 100644 --- a/lib/Target/AVR/AVRInstrInfo.h +++ b/lib/Target/AVR/AVRInstrInfo.h @@ -103,6 +103,10 @@ class AVRInstrInfo : public AVRGenInstrInfo { bool reverseBranchCondition(SmallVectorImpl &Cond) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; private: const AVRRegisterInfo RI; }; diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td index 5dd8b2c27b21..184e4d53f7c8 100644 --- a/lib/Target/AVR/AVRInstrInfo.td +++ b/lib/Target/AVR/AVRInstrInfo.td @@ -1411,17 +1411,11 @@ hasSideEffects = 0 in def LPMRdZ : FLPMX<0, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z", []>, Requires<[HasLPMX]>; - def LPMWRdZ : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), - "lpmw\t$dst, $z", - []>, - Requires<[HasLPMX]>; - // Load program memory, while postincrementing the Z register. let mayLoad = 1, Defs = [R31R30] in @@ -1429,13 +1423,19 @@ hasSideEffects = 0 in def LPMRdZPi : FLPMX<0, 1, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z+", []>, Requires<[HasLPMX]>; + def LPMWRdZ : Pseudo<(outs DREGS:$dst), + (ins ZREG:$z), + "lpmw\t$dst, $z", + []>, + Requires<[HasLPMX]>; + def LPMWRdZPi : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpmw\t$dst, $z+", []>, Requires<[HasLPMX]>; @@ -1458,7 +1458,7 @@ hasSideEffects = 0 in def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "elpm\t$dst, $z", []>, Requires<[HasELPMX]>; @@ -1467,7 +1467,7 @@ hasSideEffects = 0 in def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), - (ins ZREGS: $z), + (ins ZREG: $z), "elpm\t$dst, $z+", []>, Requires<[HasELPMX]>; @@ -1487,7 +1487,7 @@ let Uses = [R1, R0] in let Defs = [R31R30] in def SPMZPi : F16<0b1001010111111000, (outs), - (ins ZREGS:$z), + (ins ZREG:$z), "spm $z+", []>, Requires<[HasSPMX]>; @@ -1564,28 +1564,28 @@ hasSideEffects = 0 in // Read-Write-Modify (RMW) instructions. def XCHZRd : FZRd<0b100, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "xch\t$z, $rd", []>, Requires<[SupportsRMW]>; def LASZRd : FZRd<0b101, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "las\t$z, $rd", []>, Requires<[SupportsRMW]>; def LACZRd : FZRd<0b110, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lac\t$z, $rd", []>, Requires<[SupportsRMW]>; def LATZRd : FZRd<0b111, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lat\t$z, $rd", []>, Requires<[SupportsRMW]>; diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp index 475dda420e89..dfefd09bc4b8 100644 --- a/lib/Target/AVR/AVRMCInstLower.cpp +++ b/lib/Target/AVR/AVRMCInstLower.cpp @@ -37,10 +37,22 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO, Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); } + bool IsFunction = MO.isGlobal() && isa(MO.getGlobal()); + if (TF & AVRII::MO_LO) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_LO8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + } } else if (TF & AVRII::MO_HI) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_HI8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + } } else if (TF != 0) { llvm_unreachable("Unknown target flag on symbol operand"); } diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 55f3f5cf428a..249dc5512c28 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -95,7 +95,8 @@ AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } /// Fold a frame offset shared between two add instructions into a single one. -static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { +static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset, unsigned DstReg) { + MachineInstr &MI = *II; int Opcode = MI.getOpcode(); // Don't bother trying if the next instruction is not an add or a sub. @@ -120,6 +121,7 @@ static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { } // Finally remove the instruction. + II++; MI.eraseFromParent(); } @@ -158,6 +160,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); + II++; // Skip over the FRMIDX (and now MOVW) instruction. + // Generally, to load a frame address two add instructions are emitted that // could get folded into a single one: // movw r31:r30, r29:r28 @@ -166,7 +170,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to: // movw r31:r30, r29:r28 // adiw r31:r30, 45 - foldFrameOffset(*std::next(II), Offset, DstReg); + if (II != MBB.end()) + foldFrameOffset(II, Offset, DstReg); // Select the best opcode based on DstReg and the offset size. switch (DstReg) { @@ -187,7 +192,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } - MachineInstr *New = BuildMI(MBB, std::next(II), dl, TII.get(Opcode), DstReg) + MachineInstr *New = BuildMI(MBB, II, dl, TII.get(Opcode), DstReg) .addReg(DstReg, RegState::Kill) .addImm(Offset); New->getOperand(3).setIsDead(); diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td index 32650fc66751..8162f12052be 100644 --- a/lib/Target/AVR/AVRRegisterInfo.td +++ b/lib/Target/AVR/AVRRegisterInfo.td @@ -110,8 +110,6 @@ CoveredBySubRegs = 1 in // Register Classes //===----------------------------------------------------------------------===// -//:TODO: use proper set instructions instead of using always "add" - // Main 8-bit register class. def GPR8 : RegisterClass<"AVR", [i8], 8, ( @@ -199,14 +197,11 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, // We have a bunch of instructions with an explicit Z register argument. We // model this using a register class containing only the Z register. -// :TODO: Rename to 'ZREG'. -def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>; +def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>; // Register class used for the stack read pseudo instruction. def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>; -//:TODO: if we remove this we get an error in tablegen -//:TODO: this is just a hack, remove it once add16 works! // Status register. def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>; def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index 91d2a8737b87..a9d61ffc952c 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -66,6 +66,7 @@ class AVRPassConfig : public TargetPassConfig { bool addInstSelector() override; void addPreSched2() override; + void addPreEmitPass() override; void addPreRegAlloc() override; }; } // namespace @@ -115,4 +116,9 @@ void AVRPassConfig::addPreSched2() { addPass(createAVRExpandPseudoPass()); } +void AVRPassConfig::addPreEmitPass() { + // Must run branch selection immediately preceding the asm printer. + addPass(&BranchRelaxationPassID); +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index cf52e552978f..5004736365c7 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -466,6 +466,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) { if (!tryParseRegisterOperand(Operands)) { return false; } + LLVM_FALLTHROUGH; case AsmToken::LParen: case AsmToken::Integer: case AsmToken::Dot: diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp index 316b7836df0d..0f34b8e18ff9 100644 --- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp +++ b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp @@ -106,7 +106,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) || (MOI.RegClass == AVR::PTRDISPREGSRegClassID) || - (MOI.RegClass == AVR::ZREGSRegClassID); + (MOI.RegClass == AVR::ZREGRegClassID); if (isPtrReg) { O << getRegisterName(Op.getReg(), AVR::ptr); diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 1e61eccf775f..6d126ed622aa 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -33,7 +33,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) { EFlags |= ELF::EF_AVR_ARCH_AVR51; else if (Features[AVR::ELFArchAVR6]) EFlags |= ELF::EF_AVR_ARCH_AVR6; - else if (Features[AVR::ELFArchAVRTiny]) + else if (Features[AVR::ELFArchTiny]) EFlags |= ELF::EF_AVR_ARCH_AVRTINY; else if (Features[AVR::ELFArchXMEGA1]) EFlags |= ELF::EF_AVR_ARCH_XMEGA1; diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 15e89fb2a261..9fc812cdef14 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -29,7 +29,7 @@ class BPFAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -65,7 +65,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index c19e636d79ca..d901abbd1692 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -1413,6 +1413,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " case Hexagon::CONST32: is32bit = true; + LLVM_FALLTHROUGH; // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " case Hexagon::CONST64: // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 14c682c6df4b..b064778c4bbd 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1947,8 +1947,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { switch (Opc) { case Hexagon::S2_storeri_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerh_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerb_io: break; default: diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 730026121d3b..3de531088240 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -937,6 +937,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptpt: case Hexagon::J2_jumptnew: diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index aa68f6cfdfc1..49ddd6961f8a 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2244,6 +2244,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptnew: case Hexagon::J2_jumptnewpt: diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 97a53dcbaed7..c790579ccebc 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -979,18 +979,6 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { if (MFI.hasCalls() || HMFI.hasClobberLR()) return true; - // Frame pointer elimination is a possiblility at this point, but - // to know if FP is necessary we need to know if spill/restore - // functions will be used (they require FP to be valid). - // This means that hasFP shouldn't really be called before CSI is - // calculated, and some measures are taken to make sure of that - // (e.g. default implementations of virtual functions that call it - // are overridden apropriately). - assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI"); - const std::vector &CSI = MFI.getCalleeSavedInfo(); - if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI)) - return true; - return false; } @@ -2437,6 +2425,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const { if (MF.getInfo()->hasEHReturn()) return true; + if (!hasFP(MF)) + return true; if (!isOptSize(MF) && !isMinSize(MF)) if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) return true; diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index f14c733dcf51..3470480d607d 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -334,6 +334,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { if (MRI->getRegClass(PR.R) != PredRC) return false; // If it is a copy between two predicate registers, fall through. + LLVM_FALLTHROUGH; } case Hexagon::C2_and: case Hexagon::C2_andn: diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e5f49ca77a91..0163b2e2bdc4 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -241,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B - : Hexagon::V6_vL32b_ai_128B; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B + : Hexagon::V6_vL32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B + : Hexagon::V6_vL32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B : Hexagon::V6_vL32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed load"); @@ -529,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B - : Hexagon::V6_vS32b_ai_128B; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B + : Hexagon::V6_vS32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B + : Hexagon::V6_vS32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B : Hexagon::V6_vS32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed store"); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 2daacf795555..67242764d453 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -716,6 +716,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); auto PtrVT = getPointerTy(MF.getDataLayout()); // Check for varargs. @@ -832,7 +833,6 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NeedsArgAlign && Subtarget.hasV60TOps()) { DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // V6 vectors passed by value have 64 or 128 byte alignment depending // on whether we are 64 byte vector mode or 128 byte. bool UseHVXDbl = Subtarget.useHVXDblOps(); @@ -916,10 +916,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Glue); if (IsTailCall) { - MF.getFrameInfo().setHasTailCall(); + MFI.setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); } + // Set this here because we need to know this for "hasFP" in frame lowering. + // The target-independent code calls getFrameRegister before setting it, and + // getFrameRegister uses hasFP to determine whether the function has FP. + MFI.setHasCalls(true); + unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL; Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); Glue = Chain.getValue(1); @@ -1284,11 +1289,9 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Creates a SPLAT instruction for a constant value VAL. static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue Val) { - if (VT.getSimpleVT() == MVT::v4i8) - return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); - - if (VT.getSimpleVT() == MVT::v4i16) - return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + EVT T = VT.getVectorElementType(); + if (T == MVT::i8 || T == MVT::i16) + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, Val); return SDValue(); } @@ -2296,32 +2299,13 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; - case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; - case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; - case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; - case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; - case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; - case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; - case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; - case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; - case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; - case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; - case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VPACK: return "HexagonISD::VPACK"; - case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; - case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; - case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; - case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; - case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; - case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; - case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; - case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; - case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; - case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::VASL: return "HexagonISD::VASL"; + case HexagonISD::VASR: return "HexagonISD::VASR"; + case HexagonISD::VLSR: return "HexagonISD::VLSR"; + case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::OP_END: break; } @@ -2503,13 +2487,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { if (VT.getSimpleVT() == MVT::v4i16) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); @@ -2517,13 +2501,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { } else if (VT.getSimpleVT() == MVT::v2i32) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 1415156487c0..bfd2c94eeaba 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,29 +52,10 @@ namespace HexagonISD { COMBINE, PACKHL, - VSPLATB, - VSPLATH, - SHUFFEB, - SHUFFEH, - SHUFFOB, - SHUFFOH, - VSXTBH, - VSXTBW, - VSRAW, - VSRAH, - VSRLW, - VSRLH, - VSHLW, - VSHLH, - VCMPBEQ, - VCMPBGT, - VCMPBGTU, - VCMPHEQ, - VCMPHGT, - VCMPHGTU, - VCMPWEQ, - VCMPWGT, - VCMPWGTU, + VSPLAT, + VASL, + VASR, + VLSR, INSERT, INSERTRP, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 1eac2d3dd8e2..c77c669f4ca7 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -250,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case Hexagon::L2_loadri_io: case Hexagon::L2_loadrd_io: case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_nt_ai: case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::LDriw_pred: case Hexagon::LDriw_mod: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::PS_vloadrq_ai_128B: - case Hexagon::PS_vloadrw_ai_128B: { + case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: { const MachineOperand OpFI = MI.getOperand(1); if (!OpFI.isFI()) return 0; @@ -1726,6 +1730,39 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } +std::pair +HexagonInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & ~HexagonII::MO_Bitmasks, + TF & HexagonII::MO_Bitmasks); +} + +ArrayRef> +HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair Flags[] = { + {MO_PCREL, "hexagon-pcrel"}, + {MO_GOT, "hexagon-got"}, + {MO_LO16, "hexagon-lo16"}, + {MO_HI16, "hexagon-hi16"}, + {MO_GPREL, "hexagon-gprel"}, + {MO_GDGOT, "hexagon-gdgot"}, + {MO_GDPLT, "hexagon-gdplt"}, + {MO_IE, "hexagon-ie"}, + {MO_IEGOT, "hexagon-iegot"}, + {MO_TPREL, "hexagon-tprel"} + }; + return makeArrayRef(Flags); +} + +ArrayRef> +HexagonInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair Flags[] = { + {HMOTF_ConstExtended, "hexagon-ext"} + }; + return makeArrayRef(Flags); +} + unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1797,7 +1834,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { const MachineOperand &MO = MI.getOperand(ExtOpNum); // Use MO operand flags to determine if MO // has the HMOTF_ConstExtended flag set. - if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; // If this is a Machine BB address we are talking about, and it is // not marked as extended, say so. @@ -1807,9 +1844,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { // We could be using an instruction with an extendable immediate and shoehorn // a global address into it. If it is a global address it will be constant // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || MO.isJTI() || MO.isCPI() || MO.isFPImm()) return true; @@ -1961,11 +1995,9 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return true; // Use MO operand flags to determine if one of MI's operands // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI.operands_begin(), - E = MI.operands_end(); I != E; ++I) { - if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + for (const MachineOperand &MO : MI.operands()) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; - } return false; } @@ -2445,20 +2477,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, switch (Opcode) { case Hexagon::PS_vstorerq_ai: case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerw_nt_ai: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::V6_vL32b_ai: case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32b_nt_ai: + case Hexagon::V6_vS32b_nt_ai: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vS32Ub_ai: return isShiftedInt<4,6>(Offset); case Hexagon::PS_vstorerq_ai_128B: case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vstorerw_nt_ai_128B: case Hexagon::PS_vloadrq_ai_128B: case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: case Hexagon::V6_vL32b_ai_128B: case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: + case Hexagon::V6_vS32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::V6_vS32Ub_ai_128B: return isShiftedInt<4,7>(Offset); @@ -3170,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_cur_pi; case Hexagon::V6_vL32b_ai: return Hexagon::V6_vL32b_cur_ai; + case Hexagon::V6_vL32b_nt_pi: + return Hexagon::V6_vL32b_nt_cur_pi; + case Hexagon::V6_vL32b_nt_ai: + return Hexagon::V6_vL32b_nt_cur_ai; //128B case Hexagon::V6_vL32b_pi_128B: return Hexagon::V6_vL32b_cur_pi_128B; case Hexagon::V6_vL32b_ai_128B: return Hexagon::V6_vL32b_cur_ai_128B; + case Hexagon::V6_vL32b_nt_pi_128B: + return Hexagon::V6_vL32b_nt_cur_pi_128B; + case Hexagon::V6_vL32b_nt_ai_128B: + return Hexagon::V6_vL32b_nt_cur_ai_128B; } return 0; } @@ -3187,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_pi; case Hexagon::V6_vL32b_cur_ai: return Hexagon::V6_vL32b_ai; + case Hexagon::V6_vL32b_nt_cur_pi: + return Hexagon::V6_vL32b_nt_pi; + case Hexagon::V6_vL32b_nt_cur_ai: + return Hexagon::V6_vL32b_nt_ai; //128B case Hexagon::V6_vL32b_cur_pi_128B: return Hexagon::V6_vL32b_pi_128B; case Hexagon::V6_vL32b_cur_ai_128B: return Hexagon::V6_vL32b_ai_128B; + case Hexagon::V6_vL32b_nt_cur_pi_128B: + return Hexagon::V6_vL32b_nt_pi_128B; + case Hexagon::V6_vL32b_nt_cur_ai_128B: + return Hexagon::V6_vL32b_nt_ai_128B; } return 0; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 944d0161a7c8..0436ce3ac475 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -301,6 +301,27 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { const MachineInstr &UseMI, unsigned UseIdx) const override; + /// Decompose the machine operand's target flags into two values - the direct + /// target flag value and any of bit flags that are applied. + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + /// Return an array that contains the direct target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + + /// Return an array that contains the bitmask target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef> + getSerializableBitmaskMachineOperandTargetFlags() const override; + bool isTailCall(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 4602de979024..1a26805d190d 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -49,7 +49,7 @@ static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace { class HexagonCallMutation : public ScheduleDAGMutation { diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 689419638f54..ba98b8994937 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -2770,6 +2770,9 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [ multiclass vS32b_ai_pats { // Aligned stores + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2778,6 +2781,9 @@ multiclass vS32b_ai_pats { Requires<[UseHVXSgl]>; // 128B Aligned stores + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; @@ -2787,6 +2793,11 @@ multiclass vS32b_ai_pats { // Fold Add R+OFF into vector store. let AddedComplexity = 10 in { + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), (add IntRegs:$src2, Iss4_6:$offset)), (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset, @@ -2799,6 +2810,11 @@ multiclass vS32b_ai_pats { Requires<[UseHVXSgl]>; // Fold Add R+OFF into vector store 128B. + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), (add IntRegs:$src2, Iss4_7:$offset)), (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset, @@ -2820,6 +2836,9 @@ defm : vS32b_ai_pats ; multiclass vL32b_ai_pats { // Aligned loads + def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, 0) >, Requires<[UseHVXSgl]>; @@ -2828,6 +2847,9 @@ multiclass vL32b_ai_pats { Requires<[UseHVXSgl]>; // 128B Load + def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, 0) >, Requires<[UseHVXDbl]>; @@ -2837,6 +2859,9 @@ multiclass vL32b_ai_pats { // Fold Add R+OFF into vector load. let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))), (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; @@ -2844,6 +2869,9 @@ multiclass vL32b_ai_pats { (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; + def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))), (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>, Requires<[UseHVXSgl]>; @@ -2859,6 +2887,9 @@ defm : vL32b_ai_pats ; defm : vL32b_ai_pats ; multiclass STrivv_pats { + def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2866,6 +2897,10 @@ multiclass STrivv_pats { (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), (PS_vstorerw_ai_128B IntRegs:$addr, 0, (VTDbl VecDblRegs128B:$src1))>, @@ -2882,6 +2917,9 @@ defm : STrivv_pats ; defm : STrivv_pats ; multiclass LDrivv_pats { + def : Pat<(VTSgl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload I32:$addr)), (PS_vloadrw_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; @@ -2889,6 +2927,9 @@ multiclass LDrivv_pats { (PS_vloadrwu_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; + def : Pat<(VTDbl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload I32:$addr)), (PS_vloadrw_ai_128B I32:$addr, 0)>, Requires<[UseHVXDbl]>; @@ -3021,16 +3062,16 @@ def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; +def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; // Replicate the low 8-bits from 32-bits input register into each of the // four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; // Replicate the low 16-bits from 32-bits input register into each of the // four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; class VArith_pat @@ -3068,84 +3109,44 @@ def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), (i32 u5_0ImmPred:$c))))), (S2_asl_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asl_i_vh V4I16:$b, imm:$c)>; -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; class vshift_rr_pat : Pat <(Op Value:$Rs, I32:$Rt), (MI Value:$Rs, I32:$Rt)>; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; - -def: vcmp_i1_pat; -def: vcmp_i1_pat; -def: vcmp_i1_pat; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; +def: vshift_rr_pat ; class vcmp_vi1_pat @@ -3255,13 +3256,6 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)), def: Pat<(v2i16 (trunc V2I32:$Rs)), (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -3322,31 +3316,6 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat; - - // Truncated store from v4i16 to v4i8. def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr), diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 93fb688fc1c0..b42c1ab975a8 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -407,6 +407,11 @@ def PS_vstorerw_ai: STrivv_template, def PS_vstorerw_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; +def PS_vstorerw_nt_ai: STrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerw_nt_ai_128B: STrivv_template, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vstorerwu_ai: STrivv_template, Requires<[HasV60T,UseHVXSgl]>; def PS_vstorerwu_ai_128B: STrivv_template, @@ -433,6 +438,11 @@ def PS_vloadrw_ai: LDrivv_template, def PS_vloadrw_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; +def PS_vloadrw_nt_ai: LDrivv_template, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrw_nt_ai_128B: LDrivv_template, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vloadrwu_ai: LDrivv_template, Requires<[HasV60T,UseHVXSgl]>; def PS_vloadrwu_ai_128B: LDrivv_template, diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index db268b78cd73..4fa929a20810 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -350,6 +350,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { MI->getOperand(2).getImm()); case Hexagon::A4_combineri: ImmX++; + // Fall through into A4_combineir. + LLVM_FALLTHROUGH; case Hexagon::A4_combineir: { ImmX++; int64_t V = MI->getOperand(ImmX).getImm(); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 76d9b31b005f..7d88b51f32dd 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -110,10 +110,11 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); - void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); - void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonNewValueJumpPass(PassRegistry&); + void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonPacketizerPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -156,10 +157,11 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine X(getTheHexagonTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); - initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonGenMuxPass(PR); - initializeHexagonOptAddrModePass(PR); + initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonNewValueJumpPass(PR); + initializeHexagonOptAddrModePass(PR); + initializeHexagonPacketizerPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 7667bfb7a0eb..a3021e3dfe43 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -60,9 +60,7 @@ namespace { class HexagonPacketizer : public MachineFunctionPass { public: static char ID; - HexagonPacketizer() : MachineFunctionPass(ID) { - initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); - } + HexagonPacketizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -89,14 +87,14 @@ namespace { char HexagonPacketizer::ID = 0; } -INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 34d0b55aa22a..2a0edda8dcee 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -412,7 +412,7 @@ class HexagonAsmBackend : public MCAsmBackend { /// fixup kind as appropriate. void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t FixupValue, bool IsPCRel) const override { + uint64_t FixupValue, bool IsResolved) const override { // When FixupValue is 0 the relocation is external and there // is nothing for us to do. @@ -442,6 +442,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B7_PCREL: if (!(isIntN(7, sValue))) HandleFixupError(7, 2, (int64_t)FixupValue, "B7_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B7_PCREL_X: InstMask = 0x00001f18; // Word32_B7 Reloc = (((Value >> 2) & 0x1f) << 8) | // Value 6-2 = Target 12-8 @@ -451,6 +452,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B9_PCREL: if (!(isIntN(9, sValue))) HandleFixupError(9, 2, (int64_t)FixupValue, "B9_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B9_PCREL_X: InstMask = 0x003000fe; // Word32_B9 Reloc = (((Value >> 7) & 0x3) << 20) | // Value 8-7 = Target 21-20 @@ -462,6 +464,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B13_PCREL: if (!(isIntN(13, sValue))) HandleFixupError(13, 2, (int64_t)FixupValue, "B13_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B13_PCREL_X: InstMask = 0x00202ffe; // Word32_B13 Reloc = (((Value >> 12) & 0x1) << 21) | // Value 12 = Target 21 @@ -472,6 +475,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B15_PCREL: if (!(isIntN(15, sValue))) HandleFixupError(15, 2, (int64_t)FixupValue, "B15_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B15_PCREL_X: InstMask = 0x00df20fe; // Word32_B15 Reloc = (((Value >> 13) & 0x3) << 22) | // Value 14-13 = Target 23-22 @@ -483,6 +487,7 @@ class HexagonAsmBackend : public MCAsmBackend { case fixup_Hexagon_B22_PCREL: if (!(isIntN(22, sValue))) HandleFixupError(22, 2, (int64_t)FixupValue, "B22_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B22_PCREL_X: InstMask = 0x01ff3ffe; // Word32_B22 Reloc = (((Value >> 13) & 0x1ff) << 16) | // Value 21-13 = Target 24-16 diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index d8009c5da08e..7f90e83fc8e9 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -169,8 +169,11 @@ namespace HexagonII { // Hexagon specific MO operand flag mask. enum HexagonMOTargetFlagVal { - //===------------------------------------------------------------------===// - // Hexagon Specific MachineOperand flags. + // Hexagon-specific MachineOperand target flags. + // + // When chaning these, make sure to update + // getSerializableDirectMachineOperandTargetFlags and + // getSerializableBitmaskMachineOperandTargetFlags if needed. MO_NO_FLAG, /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation @@ -207,10 +210,12 @@ namespace HexagonII { MO_TPREL, // HMOTF_ConstExtended - // Addendum to abovem, indicates a const extended op + // Addendum to above, indicates a const extended op // Can be used as a mask. - HMOTF_ConstExtended = 0x80 + HMOTF_ConstExtended = 0x80, + // Union of all bitmasks (currently only HMOTF_ConstExtended). + MO_Bitmasks = HMOTF_ConstExtended }; // Hexagon Sub-instruction classes. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 564d43b45cb8..1604e7c8dc54 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -259,6 +259,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_VP_LDU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: case HexagonII::TypeLD: @@ -274,6 +275,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_STU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: case HexagonII::TypeST: diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 72e471f5766e..1394ac7210f2 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -787,6 +787,7 @@ std::unique_ptr LanaiAsmParser::parseImmediate() { case AsmToken::Dot: if (!Parser.parseExpression(ExprVal)) return LanaiOperand::createImm(ExprVal, Start, End); + LLVM_FALLTHROUGH; default: return nullptr; } diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index c212726113ab..bbce5f670c99 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -51,7 +51,7 @@ class LanaiAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -92,7 +92,7 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool /*IsPCRel*/) const { + bool /*IsResolved*/) const { MCFixupKind Kind = Fixup.getKind(); Value = adjustFixupValue(static_cast(Kind), Value); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 69b1ba1528d0..b72c9d534478 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -304,6 +304,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -343,6 +346,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetPushDirective(); bool parseSetSoftFloatDirective(); bool parseSetHardFloatDirective(); + bool parseSetMtDirective(); + bool parseSetNoMtDirective(); bool parseSetAssignment(); @@ -628,6 +633,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool useSoftFloat() const { return getSTI().getFeatureBits()[Mips::FeatureSoftFloat]; } + bool hasMT() const { + return getSTI().getFeatureBits()[Mips::FeatureMT]; + } /// Warn if RegIndex is the same as the current AT. void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc); @@ -1966,6 +1974,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::SDIV_MM: FirstOp = 0; SecondOp = 1; + LLVM_FALLTHROUGH; case Mips::SDivMacro: case Mips::DSDivMacro: case Mips::UDivMacro: @@ -2505,6 +2514,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::SEQIMacro: return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + case Mips::MFTC0: case Mips::MTTC0: + case Mips::MFTGPR: case Mips::MTTGPR: + case Mips::MFTLO: case Mips::MTTLO: + case Mips::MFTHI: case Mips::MTTHI: + case Mips::MFTACX: case Mips::MTTACX: + case Mips::MFTDSP: case Mips::MTTDSP: + case Mips::MFTC1: case Mips::MTTC1: + case Mips::MFTHC1: case Mips::MTTHC1: + case Mips::CFTC1: case Mips::CTTC1: + return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; } } @@ -4876,6 +4895,212 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } +// Map the DSP accumulator and control register to the corresponding gpr +// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions +// do not map the DSP registers contigously to gpr registers. +static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) { + switch (Inst.getOpcode()) { + case Mips::MFTLO: + case Mips::MTTLO: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::ZERO; + case Mips::AC1: + return Mips::A0; + case Mips::AC2: + return Mips::T0; + case Mips::AC3: + return Mips::T4; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTHI: + case Mips::MTTHI: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::AT; + case Mips::AC1: + return Mips::A1; + case Mips::AC2: + return Mips::T1; + case Mips::AC3: + return Mips::T5; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTACX: + case Mips::MTTACX: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::V0; + case Mips::AC1: + return Mips::A2; + case Mips::AC2: + return Mips::T2; + case Mips::AC3: + return Mips::T6; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTDSP: + case Mips::MTTDSP: + return Mips::S0; + default: + llvm_unreachable("Unknown instruction for 'mttr' dsp alias!"); + } +} + +// Map the floating point register operand to the corresponding register +// operand. +static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) { + switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) { + case Mips::F0: return Mips::ZERO; + case Mips::F1: return Mips::AT; + case Mips::F2: return Mips::V0; + case Mips::F3: return Mips::V1; + case Mips::F4: return Mips::A0; + case Mips::F5: return Mips::A1; + case Mips::F6: return Mips::A2; + case Mips::F7: return Mips::A3; + case Mips::F8: return Mips::T0; + case Mips::F9: return Mips::T1; + case Mips::F10: return Mips::T2; + case Mips::F11: return Mips::T3; + case Mips::F12: return Mips::T4; + case Mips::F13: return Mips::T5; + case Mips::F14: return Mips::T6; + case Mips::F15: return Mips::T7; + case Mips::F16: return Mips::S0; + case Mips::F17: return Mips::S1; + case Mips::F18: return Mips::S2; + case Mips::F19: return Mips::S3; + case Mips::F20: return Mips::S4; + case Mips::F21: return Mips::S5; + case Mips::F22: return Mips::S6; + case Mips::F23: return Mips::S7; + case Mips::F24: return Mips::T8; + case Mips::F25: return Mips::T9; + case Mips::F26: return Mips::K0; + case Mips::F27: return Mips::K1; + case Mips::F28: return Mips::GP; + case Mips::F29: return Mips::SP; + case Mips::F30: return Mips::FP; + case Mips::F31: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc1 alias!"); + } +} + +// Map the coprocessor operand the corresponding gpr register operand. +static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) { + switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) { + case Mips::COP00: return Mips::ZERO; + case Mips::COP01: return Mips::AT; + case Mips::COP02: return Mips::V0; + case Mips::COP03: return Mips::V1; + case Mips::COP04: return Mips::A0; + case Mips::COP05: return Mips::A1; + case Mips::COP06: return Mips::A2; + case Mips::COP07: return Mips::A3; + case Mips::COP08: return Mips::T0; + case Mips::COP09: return Mips::T1; + case Mips::COP010: return Mips::T2; + case Mips::COP011: return Mips::T3; + case Mips::COP012: return Mips::T4; + case Mips::COP013: return Mips::T5; + case Mips::COP014: return Mips::T6; + case Mips::COP015: return Mips::T7; + case Mips::COP016: return Mips::S0; + case Mips::COP017: return Mips::S1; + case Mips::COP018: return Mips::S2; + case Mips::COP019: return Mips::S3; + case Mips::COP020: return Mips::S4; + case Mips::COP021: return Mips::S5; + case Mips::COP022: return Mips::S6; + case Mips::COP023: return Mips::S7; + case Mips::COP024: return Mips::T8; + case Mips::COP025: return Mips::T9; + case Mips::COP026: return Mips::K0; + case Mips::COP027: return Mips::K1; + case Mips::COP028: return Mips::GP; + case Mips::COP029: return Mips::SP; + case Mips::COP030: return Mips::FP; + case Mips::COP031: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc0 alias!"); + } +} + +/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing +/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits. +bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned rd = 0; + unsigned u = 1; + unsigned sel = 0; + unsigned h = 0; + bool IsMFTR = false; + switch (Inst.getOpcode()) { + case Mips::MFTC0: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTC0: + u = 0; + rd = getRegisterForMxtrC0(Inst, IsMFTR); + sel = Inst.getOperand(2).getImm(); + break; + case Mips::MFTGPR: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTGPR: + rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg(); + break; + case Mips::MFTLO: + case Mips::MFTHI: + case Mips::MFTACX: + case Mips::MFTDSP: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTLO: + case Mips::MTTHI: + case Mips::MTTACX: + case Mips::MTTDSP: + rd = getRegisterForMxtrDSP(Inst, IsMFTR); + sel = 1; + break; + case Mips::MFTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MFTC1: + IsMFTR = true; + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::MTTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::CFTC1: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::CTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 3; + break; + } + unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; + unsigned Op1 = + IsMFTR ? rd + : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() + : Inst.getOperand(0).getReg()); + + TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc, + STI); + return false; +} + unsigned MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { @@ -6329,6 +6554,39 @@ bool MipsAsmParser::parseSetNoOddSPRegDirective() { return false; } +bool MipsAsmParser::parseSetMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "mt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + setFeatureBits(Mips::FeatureMT, "mt"); + getTargetStreamer().emitDirectiveSetMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool MipsAsmParser::parseSetNoMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "nomt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + clearFeatureBits(Mips::FeatureMT, "mt"); + + getTargetStreamer().emitDirectiveSetNoMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + bool MipsAsmParser::parseSetPopDirective() { MCAsmParser &Parser = getParser(); SMLoc Loc = getLexer().getLoc(); @@ -6829,6 +7087,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetMsaDirective(); } else if (Tok.getString() == "nomsa") { return parseSetNoMsaDirective(); + } else if (Tok.getString() == "mt") { + return parseSetMtDirective(); + } else if (Tok.getString() == "nomt") { + return parseSetNoMtDirective(); } else if (Tok.getString() == "softfloat") { return parseSetSoftFloatDirective(); } else if (Tok.getString() == "hardfloat") { @@ -7078,6 +7340,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) { /// ::= .module fp=value /// ::= .module softfloat /// ::= .module hardfloat +/// ::= .module mt bool MipsAsmParser::parseDirectiveModule() { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); @@ -7176,6 +7439,25 @@ bool MipsAsmParser::parseDirectiveModule() { return false; } + return false; // parseDirectiveModule has finished successfully. + } else if (Option == "mt") { + setModuleFeatureBits(Mips::FeatureMT, "mt"); + + // Synchronize the ABI Flags information with the FeatureBits information we + // updated above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated ABI Flags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted later). + getTargetStreamer().emitDirectiveModuleMT(); + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + return false; // parseDirectiveModule has finished successfully. } else { return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index f38541027023..9abd4f1d6b08 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -159,6 +159,8 @@ struct MipsABIFlagsSection { ASESet |= Mips::AFL_ASE_MICROMIPS; if (P.inMips16Mode()) ASESet |= Mips::AFL_ASE_MIPS16; + if (P.hasMT()) + ASESet |= Mips::AFL_ASE_MT; } template diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index ae48d6e38fa0..a1ed0ea4d7f3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -238,7 +238,7 @@ static unsigned calculateMMLEIndex(unsigned i) { void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); MCContext &Ctx = Asm.getContext(); Value = adjustFixupValue(Fixup, Value, Ctx); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index bf3b290b7ed5..8ebde3b9b7a4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -40,7 +40,7 @@ class MipsAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; Optional getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 0cd4aebe4d16..7caeb08589af 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -50,6 +50,8 @@ void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetMt() {} +void MipsTargetStreamer::emitDirectiveSetNoMt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) { forbidModuleDirective(); @@ -118,6 +120,7 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg() { } void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {} void MipsTargetStreamer::emitDirectiveModuleHardFloat() {} +void MipsTargetStreamer::emitDirectiveModuleMT() {} void MipsTargetStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { forbidModuleDirective(); @@ -190,6 +193,21 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); } +void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int16_t Imm0, int16_t Imm1, + int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createImm(Imm0)); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().EmitInstruction(TmpInst, *STI); +} + void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI) { @@ -392,6 +410,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() { MipsTargetStreamer::emitDirectiveSetNoMsa(); } +void MipsTargetAsmStreamer::emitDirectiveSetMt() { + OS << "\t.set\tmt\n"; + MipsTargetStreamer::emitDirectiveSetMt(); +} + +void MipsTargetAsmStreamer::emitDirectiveSetNoMt() { + OS << "\t.set\tnomt\n"; + MipsTargetStreamer::emitDirectiveSetNoMt(); +} + void MipsTargetAsmStreamer::emitDirectiveSetAt() { OS << "\t.set\tat\n"; MipsTargetStreamer::emitDirectiveSetAt(); @@ -656,6 +684,10 @@ void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() { OS << "\t.module\thardfloat\n"; } +void MipsTargetAsmStreamer::emitDirectiveModuleMT() { + OS << "\t.module\tmt\n"; +} + // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index f24761d7d101..d2f0fdcc6cc1 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -188,6 +188,8 @@ def FeatureUseTCCInDIV : SubtargetFeature< def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", "Disable 4-operand madd.fmt and related instructions">; +def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 40078fb77144..89a5854bede0 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -240,7 +240,8 @@ def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, AssemblerPredicate<"!FeatureMadd4">; - +def HasMT : Predicate<"Subtarget->hasMT()">, + AssemblerPredicate<"FeatureMT">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. @@ -382,6 +383,10 @@ class ASE_MSA64 { list InsnPredicates = [HasMSA, HasMips64]; } +class ASE_MT { + list InsnPredicates = [HasMT]; +} + // Class used for separating microMIPSr6 and microMIPS (r3) instruction. // It can be used only on instructions that doesn't inherit PredicateControl. class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl { @@ -2919,6 +2924,10 @@ include "MipsMSAInstrInfo.td" include "MipsEVAInstrFormats.td" include "MipsEVAInstrInfo.td" +// MT +include "MipsMTInstrFormats.td" +include "MipsMTInstrInfo.td" + // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td new file mode 100644 index 000000000000..edc0981e6278 --- /dev/null +++ b/lib/Target/Mips/MipsMTInstrFormats.td @@ -0,0 +1,99 @@ +//===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe the MIPS MT instructions format +// +// opcode - operation code. +// rt - destination register +// +//===----------------------------------------------------------------------===// + +class MipsMTInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl { + let DecoderNamespace = "Mips"; + let EncodingPredicates = [HasStdEnc]; +} + +class OPCODE1 Val> { + bits<1> Value = Val; +} + +def OPCODE_SC_D : OPCODE1<0b0>; +def OPCODE_SC_E : OPCODE1<0b1>; + +class FIELD5 Val> { + bits<5> Value = Val; +} + +def FIELD5_1_DMT_EMT : FIELD5<0b00001>; +def FIELD5_2_DMT_EMT : FIELD5<0b01111>; +def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>; +def FIELD5_MFTR : FIELD5<0b01000>; +def FIELD5_MTTR : FIELD5<0b01100>; + +class COP0_MFMC0_MT : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = 0b01011; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = Op1.Value; + let Inst{10-6} = Op2.Value; + let Inst{5} = sc.Value; + let Inst{4-3} = 0b00; + let Inst{2-0} = 0b001; +} + +class COP0_MFTTR_MT : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + bits<5> rd; + bits<1> u; + bits<1> h; + bits<3> sel; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = Op.Value; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; // rx - currently unsupported. + let Inst{5} = u; + let Inst{4} = h; + let Inst{3} = 0b0; + let Inst{2-0} = sel; +} + +class SPECIAL3_MT_FORK : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rt; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001000; // FORK +} + +class SPECIAL3_MT_YIELD : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = 0b00000; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001001; // FORK +} diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td new file mode 100644 index 000000000000..72e626cbec40 --- /dev/null +++ b/lib/Target/Mips/MipsMTInstrInfo.td @@ -0,0 +1,208 @@ +//===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the MIPS MT ASE as defined by MD00378 1.12. +// +// TODO: Add support for the microMIPS encodings for the MT ASE and add the +// instruction mappings. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Encodings +//===----------------------------------------------------------------------===// + +class DMT_ENC : COP0_MFMC0_MT; + +class EMT_ENC : COP0_MFMC0_MT; + +class DVPE_ENC : COP0_MFMC0_MT; + +class EVPE_ENC : COP0_MFMC0_MT; + +class FORK_ENC : SPECIAL3_MT_FORK; + +class YIELD_ENC : SPECIAL3_MT_YIELD; + +class MFTR_ENC : COP0_MFTTR_MT; + +class MTTR_ENC : COP0_MFTTR_MT; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Descriptions +//===----------------------------------------------------------------------===// + +class MT_1R_DESC_BASE { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins); + string AsmString = !strconcat(instr_asm, "\t$rt"); + list Pattern = []; + InstrItinClass Itinerary = Itin; +} + +class MFTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mftr\t$rd, $rt, $u, $sel, $h"; + list Pattern = []; + InstrItinClass Itinerary = II_MFTR; +} + +class MTTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mttr\t$rt, $rd, $u, $sel, $h"; + list Pattern = []; + InstrItinClass Itinerary = II_MTTR; +} + +class FORK_DESC { + dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt); + string AsmString = "fork\t$rd, $rs, $rt"; + list Pattern = []; + InstrItinClass Itinerary = II_FORK; +} + +class YIELD_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rs); + string AsmString = "yield\t$rd, $rs"; + list Pattern = []; + InstrItinClass Itinerary = II_YIELD; +} + +class DMT_DESC : MT_1R_DESC_BASE<"dmt", II_DMT>; + +class EMT_DESC : MT_1R_DESC_BASE<"emt", II_EMT>; + +class DVPE_DESC : MT_1R_DESC_BASE<"dvpe", II_DVPE>; + +class EVPE_DESC : MT_1R_DESC_BASE<"evpe", II_EVPE>; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// +let hasSideEffects = 1, isNotDuplicable = 1, + AdditionalPredicates = [NotInMicroMips] in { + def DMT : DMT_ENC, DMT_DESC, ASE_MT; + + def EMT : EMT_ENC, EMT_DESC, ASE_MT; + + def DVPE : DVPE_ENC, DVPE_DESC, ASE_MT; + + def EVPE : EVPE_ENC, EVPE_DESC, ASE_MT; + + def FORK : FORK_ENC, FORK_DESC, ASE_MT; + + def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT; + + def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT; + + def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT; +} + +//===----------------------------------------------------------------------===// +// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases. +//===----------------------------------------------------------------------===// +def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt, + uimm3:$sel), + "mftc0 $rd, $rt, $sel">, ASE_MT; + +def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mftgpr $rd, $rt">, ASE_MT; + +def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftlo $rt, $ac">, ASE_MT; + +def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mfthi $rt, $ac">, ASE_MT; + +def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftacx $rt, $ac">, ASE_MT; + +def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins), + "mftdsp $rt">, ASE_MT; + +def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mftc1 $rt, $ft">, ASE_MT; + +def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mfthc1 $rt, $ft">, ASE_MT; + +def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft), + "cftc1 $rt, $ft">, ASE_MT; + + +def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mttc0 $rt, $rd, $sel">, ASE_MT; + +def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd), + "mttgpr $rd, $rt">, ASE_MT; + +def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttlo $rt, $ac">, ASE_MT; + +def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mtthi $rt, $ac">, ASE_MT; + +def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttacx $rt, $ac">, ASE_MT; + +def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt), + "mttdsp $rt">, ASE_MT; + +def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mttc1 $rt, $ft">, ASE_MT; + +def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mtthc1 $rt, $ft">, ASE_MT; + +def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt), + "cttc1 $rt, $ft">, ASE_MT; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// + +let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"dmt", (DMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"emt", (EMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"dvpe", (DVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT; + + def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT; +} diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index c0de59ba15f5..8ec55ab6284d 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -84,6 +84,7 @@ def II_DIVU : InstrItinClass; def II_DIV_D : InstrItinClass; def II_DIV_S : InstrItinClass; def II_DMFC0 : InstrItinClass; +def II_DMT : InstrItinClass; def II_DMTC0 : InstrItinClass; def II_DMFC1 : InstrItinClass; def II_DMTC1 : InstrItinClass; @@ -113,8 +114,12 @@ def II_DSBH : InstrItinClass; def II_DSHD : InstrItinClass; def II_DSUBU : InstrItinClass; def II_DSUB : InstrItinClass; +def II_DVPE : InstrItinClass; +def II_EMT : InstrItinClass; +def II_EVPE : InstrItinClass; def II_EXT : InstrItinClass; // Any EXT instruction def II_FLOOR : InstrItinClass; +def II_FORK : InstrItinClass; def II_INS : InstrItinClass; // Any INS instruction def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo. def II_J : InstrItinClass; @@ -221,6 +226,7 @@ def II_MFC1 : InstrItinClass; def II_MFHC1 : InstrItinClass; def II_MFC2 : InstrItinClass; def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo +def II_MFTR : InstrItinClass; def II_MOD : InstrItinClass; def II_MODU : InstrItinClass; def II_MOVE : InstrItinClass; @@ -250,6 +256,7 @@ def II_MTC1 : InstrItinClass; def II_MTHC1 : InstrItinClass; def II_MTC2 : InstrItinClass; def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo +def II_MTTR : InstrItinClass; def II_MUL : InstrItinClass; def II_MUH : InstrItinClass; def II_MUHU : InstrItinClass; @@ -345,6 +352,7 @@ def II_WRPGPR : InstrItinClass; def II_RDPGPR : InstrItinClass; def II_DVP : InstrItinClass; def II_EVP : InstrItinClass; +def II_YIELD : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. @@ -386,6 +394,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -404,7 +413,11 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -653,12 +666,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, + InstrItinData]>, InstrItinData]>, InstrItinData]>, InstrItinData]>, @@ -670,5 +685,6 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData]>, InstrItinData]>, InstrItinData]>, - InstrItinData]> + InstrItinData]>, + InstrItinData]> ]>; diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td index 15a0401b781e..89cda676441e 100644 --- a/lib/Target/Mips/MipsScheduleGeneric.td +++ b/lib/Target/Mips/MipsScheduleGeneric.td @@ -187,7 +187,11 @@ def GenericIssueCOP0 : ProcResource<1> { let Super = GenericCOP0; } def GenericWriteCOP0TLB : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 4; } def GenericWriteCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 3; } def GenericReadCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 2; } -def GnereicReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> { + let Latency = 5; +} +def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>; @@ -261,6 +265,14 @@ def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE, def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>; +// MIPS MT instructions +// ==================== + +def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>; + +def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>; +def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>; + // MIPS32R6 and MIPS16e // ==================== diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td index 882a241d1426..fedfac24e4e7 100644 --- a/lib/Target/Mips/MipsScheduleP5600.td +++ b/lib/Target/Mips/MipsScheduleP5600.td @@ -19,7 +19,7 @@ def MipsP5600Model : SchedMachineModel { HasMips64, HasMips64r2, HasCnMips, InMicroMips, InMips16Mode, HasMicroMips32r6, HasMicroMips64r6, - HasDSP, HasDSPR2]; + HasDSP, HasDSPR2, HasMT]; } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 154d5825427b..eba21e0a1c67 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -70,7 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), HasMT(false), TM(TM), + TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ccd47f00c0d3..7619e7b08612 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -149,6 +149,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // related instructions. bool DisableMadd4; + // HasMT -- support MT ASE. + bool HasMT; + InstrItineraryData InstrItins; // We can override the determination of whether we are in mips16 mode @@ -259,6 +262,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo { bool hasMSA() const { return HasMSA; } bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } + bool hasMT() const { return HasMT; } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 41ebe411b98d..af24838665e1 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -40,6 +40,8 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveSetNoMacro(); virtual void emitDirectiveSetMsa(); virtual void emitDirectiveSetNoMsa(); + virtual void emitDirectiveSetMt(); + virtual void emitDirectiveSetNoMt(); virtual void emitDirectiveSetAt(); virtual void emitDirectiveSetAtWithArg(unsigned RegNo); virtual void emitDirectiveSetNoAt(); @@ -96,6 +98,7 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveModuleOddSPReg(); virtual void emitDirectiveModuleSoftFloat(); virtual void emitDirectiveModuleHardFloat(); + virtual void emitDirectiveModuleMT(); virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value); virtual void emitDirectiveSetOddSPReg(); virtual void emitDirectiveSetNoOddSPReg(); @@ -116,6 +119,9 @@ class MipsTargetStreamer : public MCTargetStreamer { SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, + int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI); void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, @@ -204,6 +210,8 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveSetNoMacro() override; void emitDirectiveSetMsa() override; void emitDirectiveSetNoMsa() override; + void emitDirectiveSetMt() override; + void emitDirectiveSetNoMt() override; void emitDirectiveSetAt() override; void emitDirectiveSetAtWithArg(unsigned RegNo) override; void emitDirectiveSetNoAt() override; @@ -267,6 +275,7 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveModuleOddSPReg() override; void emitDirectiveModuleSoftFloat() override; void emitDirectiveModuleHardFloat() override; + void emitDirectiveModuleMT() override; void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; void emitDirectiveSetOddSPReg() override; void emitDirectiveSetNoOddSPReg() override; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index f26b9a7cb8dd..f800d91f4093 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -62,7 +62,6 @@ #include #include -#undef DEBUG_TYPE #define DEBUG_TYPE "nvptx-lower" using namespace llvm; @@ -2456,7 +2455,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // v2f16 was loaded as an i32. Now we must bitcast it back. else if (EltVT == MVT::v2f16) Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt); - // Extend the element if necesary (e.g. an i8 is loaded + // Extend the element if necessary (e.g. an i8 is loaded // into an i16 register) if (Ins[InsIdx].VT.isInteger() && Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) { diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 3be291b48b8f..989f0a3aba2f 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -42,6 +43,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); + AU.addRequired(); } bool runOnFunction(Function &F) override; @@ -61,6 +63,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(F); // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { @@ -104,15 +108,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); - Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); + ConstantInt *CopyLen = + ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - createMemCpyLoop(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile()); + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile()); + } else { + createMemCpyLoopKnownSize(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), TTI); + } SI->eraseFromParent(); LI->eraseFromParent(); @@ -121,7 +136,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Transform mem* intrinsic calls. for (MemIntrinsic *MemCall : MemCalls) { if (MemCpyInst *Memcpy = dyn_cast(MemCall)) { - expandMemCpyAsLoop(Memcpy); + expandMemCpyAsLoop(Memcpy, TTI); } else if (MemMoveInst *Memmove = dyn_cast(MemCall)) { expandMemMoveAsLoop(Memmove); } else if (MemSetInst *Memset = dyn_cast(MemCall)) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 7393f3d7a08a..bdad2fe8714f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -115,7 +115,7 @@ class PPCAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 094d3e6a61b5..53f33ac1fc0e 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -607,7 +607,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); - DeleteDeadPHIs(CountedExitBlock); + // Run through the basic blocks of the loop and see if any of them have dead + // PHIs that can be removed. + for (auto I : L->blocks()) + DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index c2c115cb6daf..b49c3345a17d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - // If we are a leaf function, and use up to 224 bytes of stack space, - // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate - // stackless code if all local vars are reg-allocated. - bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); unsigned LR = RegInfo->getRARegister(); - if (!DisableRedZone && - (Subtarget.isPPC64() || // 32-bit SVR4, no stack- - !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && - FrameSize <= 224 && // Fits in red zone. - !MFI.hasVarSizedObjects() && // No dynamic alloca. - !MFI.adjustsStack() && // No calls. - !MustSaveLR(MF, LR) && - !RegInfo->hasBasePointer(MF)) { // No special alignment. + bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. + !MFI.adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && // No need to save LR. + !RegInfo->hasBasePointer(MF); // No special alignment. + + // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // code if all local vars are reg-allocated. + bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); + + // Check whether we can skip adjusting the stack pointer (by using red zone) + if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1869,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } if (HasVRSaveArea) { - // Insert alignment padding, we need 16-byte alignment. - LowerBound = (LowerBound - 15) & ~(15); + // Insert alignment padding, we need 16-byte alignment. Note: for postive + // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since + // we are using negative number here (the stack grows downward). We should + // use formula : y = x & (~(n-1)). Where x is the size before aligning, n + // is the alignment size ( n = 16 here) and y is the size after aligning. + assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); + LowerBound &= ~(15); for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { int FI = VRegs[i].getFrameIdx(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 535b9deaefac..3aaf7ef2c2a0 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -419,25 +419,6 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { .getNode(); } -/// isIntS16Immediate - This method tests to see if the node is either a 32-bit -/// or 64-bit immediate, and if the value can be accurately represented as a -/// sign extension from a 16-bit value. If so, this returns true and the -/// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) - return false; - - Imm = (short)cast(N)->getZExtValue(); - if (N->getValueType(0) == MVT::i32) - return Imm == (int32_t)cast(N)->getZExtValue(); - else - return Imm == (int64_t)cast(N)->getZExtValue(); -} - -static bool isIntS16Immediate(SDValue Op, short &Imm) { - return isIntS16Immediate(Op.getNode(), Imm); -} - /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -728,7 +709,10 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { static unsigned getInt64Count(int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return Count; for (unsigned r = 1; r < 63; ++r) { @@ -838,7 +822,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return getInt64Direct(CurDAG, dl, Imm); unsigned RMin = 0; @@ -2126,7 +2113,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, @@ -2173,7 +2160,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), @@ -3323,7 +3310,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryLogicOpOfCompares(N)) return; - short Imm; + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { KnownBits LHSKnown; @@ -3346,7 +3333,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; } case ISD::ADD: { - short Imm; + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); @@ -4034,11 +4021,13 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { O0.getNode(), O1.getNode()); }; + // FIXME: When the semantics of the interaction between select and undef + // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); - if (!TrueRes) + if (!TrueRes || TrueRes.isUndef()) break; SDValue FalseRes = TryFold(ConstFalse); - if (!FalseRes) + if (!FalseRes || FalseRes.isUndef()) break; // For us to materialize these using one instruction, we must be able to diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 72f14e969138..0e069ec1665f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -136,6 +136,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } + // Match BITREVERSE to customized fast code sequence in the td file. + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -1168,6 +1172,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; + case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -2028,17 +2033,17 @@ int PPC::isQVALIGNIShuffleMask(SDNode *N) { /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { +bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa(N)) return false; - Imm = (short)cast(N)->getZExtValue(); + Imm = (int16_t)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } -static bool isIntS16Immediate(SDValue Op, short &Imm) { +bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } @@ -2048,7 +2053,7 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { - short imm = 0; + int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i @@ -2138,7 +2143,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return false; if (N.getOpcode() == ISD::ADD) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); @@ -2162,7 +2167,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add @@ -2190,7 +2195,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" - short Imm; + int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, @@ -2235,10 +2240,15 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, if (SelectAddressRegReg(N, Base, Index, DAG)) return true; - // If the operand is an addition, always emit this as [r+r], since this is - // better (for code size, and execution, as the memop does the add for free) - // than emitting an explicit add. - if (N.getOpcode() == ISD::ADD) { + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. However, we can reduce + // register pressure if we do not materialize a constant just for use as the + // index register. We only get rid of the add if it is not an add of a + // value and a 16-bit signed constant and both have a single use. + int16_t imm = 0; + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), imm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -6422,7 +6432,7 @@ PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - // Get the corect type for integers. + // Get the correct type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. @@ -6439,7 +6449,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. @@ -6514,7 +6524,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, @@ -6645,6 +6655,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); @@ -6656,6 +6667,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6664,6 +6676,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6677,6 +6690,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -11311,6 +11325,132 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// This function adds the required vector_shuffle needed to get +// the elements of the vector extract in the correct position +// as specified by the CorrectElems encoding. +static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, + SDValue Input, uint64_t Elems, + uint64_t CorrectElems) { + SDLoc dl(N); + + unsigned NumElems = Input.getValueType().getVectorNumElements(); + SmallVector ShuffleMask(NumElems, -1); + + // Knowing the element indices being extracted from the original + // vector and the order in which they're being inserted, just put + // them at element indices required for the instruction. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (DAG.getDataLayout().isLittleEndian()) + ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; + else + ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; + CorrectElems = CorrectElems >> 8; + Elems = Elems >> 8; + } + + SDValue Shuffle = + DAG.getVectorShuffle(Input.getValueType(), dl, Input, + DAG.getUNDEF(Input.getValueType()), ShuffleMask); + + EVT Ty = N->getValueType(0); + SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); + return BV; +} + +// Look for build vector patterns where input operands come from sign +// extended vector_extract elements of specific indices. If the correct indices +// aren't used, add a vector shuffle to fix up the indices and create a new +// PPCISD:SExtVElems node which selects the vector sign extend instructions +// during instruction selection. +static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { + // This array encodes the indices that the vector sign extend instructions + // extract from when extending from one type to another for both BE and LE. + // The right nibble of each byte corresponds to the LE incides. + // and the left nibble of each byte corresponds to the BE incides. + // For example: 0x3074B8FC byte->word + // For LE: the allowed indices are: 0x0,0x4,0x8,0xC + // For BE: the allowed indices are: 0x3,0x7,0xB,0xF + // For example: 0x000070F8 byte->double word + // For LE: the allowed indices are: 0x0,0x8 + // For BE: the allowed indices are: 0x7,0xF + uint64_t TargetElems[] = { + 0x3074B8FC, // b->w + 0x000070F8, // b->d + 0x10325476, // h->w + 0x00003074, // h->d + 0x00001032, // w->d + }; + + uint64_t Elems = 0; + int Index; + SDValue Input; + + auto isSExtOfVecExtract = [&](SDValue Op) -> bool { + if (!Op) + return false; + if (Op.getOpcode() != ISD::SIGN_EXTEND) + return false; + + SDValue Extract = Op.getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + + ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1)); + if (!ExtOp) + return false; + + Index = ExtOp->getZExtValue(); + if (Input && Input != Extract.getOperand(0)) + return false; + + if (!Input) + Input = Extract.getOperand(0); + + Elems = Elems << 8; + Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; + Elems |= Index; + + return true; + }; + + // If the build vector operands aren't sign extended vector extracts, + // of the same input vector, then return. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (!isSExtOfVecExtract(N->getOperand(i))) { + return SDValue(); + } + } + + // If the vector extract indicies are not correct, add the appropriate + // vector_shuffle. + int TgtElemArrayIdx; + int InputSize = Input.getValueType().getScalarSizeInBits(); + int OutputSize = N->getValueType(0).getScalarSizeInBits(); + if (InputSize + OutputSize == 40) + TgtElemArrayIdx = 0; + else if (InputSize + OutputSize == 72) + TgtElemArrayIdx = 1; + else if (InputSize + OutputSize == 48) + TgtElemArrayIdx = 2; + else if (InputSize + OutputSize == 80) + TgtElemArrayIdx = 3; + else if (InputSize + OutputSize == 96) + TgtElemArrayIdx = 4; + else + return SDValue(); + + uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; + CorrectElems = DAG.getDataLayout().isLittleEndian() + ? CorrectElems & 0x0F0F0F0F0F0F0F0F + : CorrectElems & 0xF0F0F0F0F0F0F0F0; + if (Elems != CorrectElems) { + return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); + } + + // Regular lowering will catch cases where a shuffle is not needed. + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -11338,6 +11478,15 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, if (Reduced) return Reduced; + // If we're building a vector out of extended elements from another vector + // we have P9 vector integer extend instructions. + if (Subtarget.hasP9Altivec()) { + Reduced = combineBVOfVecSExt(N, DAG); + if (Reduced) + return Reduced; + } + + if (N->getValueType(0) != MVT::v2f64) return SDValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a5108727bb4b..821927d3b157 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -67,6 +67,10 @@ namespace llvm { /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, + /// SExtVElems, takes an input vector of a smaller type and sign + /// extends to an output vector of a larger type. + SExtVElems, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, @@ -1092,6 +1096,9 @@ namespace llvm { ISD::ArgFlagsTy &ArgFlags, CCState &State); + bool isIntS16Immediate(SDNode *N, int16_t &Imm); + bool isIntS16Immediate(SDValue Op, int16_t &Imm); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 47d59c25392a..6d9f55206b6a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -32,6 +32,9 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [ def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; +def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -131,6 +134,7 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore]>; def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; +def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", @@ -4450,3 +4454,190 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; } // IsISA3_0 + +// Fast 32-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): +// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): +// n' = (n rotl 24); After which n' = [B4, B1, B2, B3] +// Step 4.2: Insert B3 to the right position: +// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] +// Step 4.3: Insert B1 to the right position: +// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] +def MaskValues { + dag Lo1 = (ORI (LIS 0x5555), 0x5555); + dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); + dag Lo2 = (ORI (LIS 0x3333), 0x3333); + dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); + dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); + dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); +} + +def Shift1 { + dag Right = (RLWINM $A, 31, 1, 31); + dag Left = (RLWINM $A, 1, 0, 30); +} + +def Swap1 { + dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), + (AND Shift1.Left, MaskValues.Hi1)); +} + +def Shift2 { + dag Right = (RLWINM Swap1.Bit, 30, 2, 31); + dag Left = (RLWINM Swap1.Bit, 2, 0, 29); +} + +def Swap2 { + dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), + (AND Shift2.Left, MaskValues.Hi2)); +} + +def Shift4 { + dag Right = (RLWINM Swap2.Bits, 28, 4, 31); + dag Left = (RLWINM Swap2.Bits, 4, 0, 27); +} + +def Swap4 { + dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), + (AND Shift4.Left, MaskValues.Hi4)); +} + +def Rotate { + dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); +} + +def RotateInsertByte3 { + dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); +} + +def RotateInsertByte1 { + dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); +} + +def : Pat<(i32 (bitreverse i32:$A)), + (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; + +// Fast 64-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]): +// Apply the same byte reverse algorithm mentioned above for the fast 32-bit +// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And +// then OR them together to get the final result. +def MaskValues64 { + dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); + dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); + dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); + dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); + dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); + dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); +} + +def DWMaskValues { + dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); + dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); + dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); + dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); + dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); + dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); +} + +def DWShift1 { + dag Right = (RLDICL $A, 63, 1); + dag Left = (RLDICR $A, 1, 62); +} + +def DWSwap1 { + dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1), + (AND8 DWShift1.Left, DWMaskValues.Hi1)); +} + +def DWShift2 { + dag Right = (RLDICL DWSwap1.Bit, 62, 2); + dag Left = (RLDICR DWSwap1.Bit, 2, 61); +} + +def DWSwap2 { + dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2), + (AND8 DWShift2.Left, DWMaskValues.Hi2)); +} + +def DWShift4 { + dag Right = (RLDICL DWSwap2.Bits, 60, 4); + dag Left = (RLDICR DWSwap2.Bits, 4, 59); +} + +def DWSwap4 { + dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4), + (AND8 DWShift4.Left, DWMaskValues.Hi4)); +} + +// Bit swap is done, now start byte swap. +def DWExtractLo32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32)); +} + +def DWRotateLo32 { + dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31); +} + +def DWLo32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15); +} + +// Lower 32 bits in the right order +def DWLo32RotateInsertByte1 { + dag Left = + (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31); +} + +def ExtendLo32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWLo32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftHi32 { // SRDI DWSwap4.Bits, 32) + dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32); +} + +def DWExtractHi32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32)); +} + +def DWRotateHi32 { + dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31); +} + +def DWHi32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15); +} + +// High 32 bits in the right order, but in the low 32-bit position +def DWHi32RotateInsertByte1 { + dag Left = + (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31); +} + +def ExtendHi32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWHi32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32 + dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31); +} + +def : Pat<(i64 (bitreverse i64:$A)), + (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9cfc897cdb3f..43635a8919e2 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1901,6 +1901,98 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +// Variable index unsigned vector_extract on Power9 +let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWRX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWRX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; +} +let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWLX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWLX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; +} + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -2729,36 +2821,54 @@ def DblToFlt { } def ByteToWord { - dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); - dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); - dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); - dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); } def ByteToDWord { - dag A0 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); - dag A1 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); } def HWordToWord { - dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); - dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); - dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); - dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); } def HWordToDWord { - dag A0 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); - dag A1 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); } def WordToDWord { - dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); - dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); + dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); } def FltToIntLoad { @@ -3016,18 +3126,46 @@ let AddedComplexity = 400 in { // P9 Altivec instructions that can be used to build vectors. // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete // with complexities of existing build vector patterns in this file. - let Predicates = [HasP9Altivec] in { - def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), (v2i64 (VEXTSW2D $A))>; - def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), (v2i64 (VEXTSH2D $A))>; - def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, - HWordToWord.A2, HWordToWord.A3)), + def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, + HWordToWord.LE_A2, HWordToWord.LE_A3)), (v4i32 (VEXTSH2W $A))>; - def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, - ByteToWord.A2, ByteToWord.A3)), + def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, + ByteToWord.LE_A2, ByteToWord.LE_A3)), (v4i32 (VEXTSB2W $A))>; - def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), (v2i64 (VEXTSB2D $A))>; } + + let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, + HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, + ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + } } diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td index a9c1bd78b05e..a01995a629c2 100644 --- a/lib/Target/PowerPC/PPCScheduleP9.td +++ b/lib/Target/PowerPC/PPCScheduleP9.td @@ -260,8 +260,8 @@ let SchedModel = P9Model in { // ***************** Defining Itinerary Class Resources ***************** - def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple, - IIC_IntGeneral]>; + def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], + [IIC_IntSimple, IIC_IntGeneral]>; def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 5a97f595ad8c..90d11f46a384 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -272,6 +272,13 @@ class PPCSubtarget : public PPCGenSubtargetInfo { return 16; } + + // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no + // red zone and PPC64 SVR4ABI has a 288-byte red zone. + unsigned getRedZoneSize() const { + return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0); + } + bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 491eaf326a50..7d34efd4af3e 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -195,8 +195,10 @@ struct PPCVSXSwapRemoval : public MachineFunctionPass { return false; // If we don't have VSX on the subtarget, don't do anything. + // Also, on Power 9 the load and store ops preserve element order and so + // the swaps are not required. const PPCSubtarget &STI = MF.getSubtarget(); - if (!STI.hasVSX()) + if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps()) return false; bool Changed = false; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index f85c0cf111c4..be83efc02d27 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -34,7 +34,7 @@ class RISCVAsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -73,7 +73,7 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { return; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index d4454c271f5a..0d021d67033e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -211,6 +211,7 @@ namespace { case Sparc::fixup_sparc_wplt30: if (Target.getSymA()->getSymbol().isTemporary()) return false; + LLVM_FALLTHROUGH; case Sparc::fixup_sparc_tls_gd_hi22: case Sparc::fixup_sparc_tls_gd_lo10: case Sparc::fixup_sparc_tls_gd_add: @@ -275,7 +276,7 @@ namespace { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 6b32a7926437..51ac410a9c81 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -52,7 +52,7 @@ class SystemZMCAsmBackend : public MCAsmBackend { const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -94,7 +94,7 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); unsigned Offset = Fixup.getOffset(); unsigned BitSize = getFixupKindInfo(Kind).TargetSize; diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index fe4b52b515e0..73a1036f88e0 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This is the limit of processor resource usage at which the // scheduler should try to look for other instructions (not using the diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index fef4a8c92a36..2801141cd951 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2224,15 +2224,12 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, -// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation -// on the extended Op0 and (unextended) Op1. Store the even register result +// and Opcode performs the GR128 operation. Store the even register result // in Even and the odd register result in Odd. static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - unsigned Extend, unsigned Opcode, SDValue Op0, - SDValue Op1, SDValue &Even, SDValue &Odd) { - SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); - SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, - SDValue(In128, 0), Op1); + unsigned Opcode, SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); bool Is32Bit = is32Bit(VT); Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); @@ -2347,6 +2344,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle tests for order using (or (ogt y x) (oge x y)). case ISD::SETUO: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2358,6 +2356,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle <> tests using (or (ogt y x) (ogt x y)). case ISD::SETUEQ: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2962,7 +2961,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { - // Do a full 128-bit multiplication based on UMUL_LOHI64: + // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) // @@ -2980,10 +2979,10 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. SMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, LL, RL, Ops[1], Ops[0]); SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); @@ -3004,10 +3003,10 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::UMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3018,24 +3017,19 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Opcode; - // We use DSGF for 32-bit division. - if (is32Bit(VT)) { + // We use DSGF for 32-bit division. This means the first operand must + // always be 64-bit, and the second operand should be 32-bit whenever + // that is possible, to improve performance. + if (is32Bit(VT)) Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Opcode = SystemZISD::SDIVREM32; - } else if (DAG.ComputeNumSignBits(Op1) > 32) { + else if (DAG.ComputeNumSignBits(Op1) > 32) Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); - Opcode = SystemZISD::SDIVREM32; - } else - Opcode = SystemZISD::SDIVREM64; - // DSG(F) takes a 64-bit dividend, so the even register in the GR128 - // input is "don't care". The instruction returns the remainder in - // the even register and the quotient in the odd register. + // DSG(F) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, - Op0, Op1, Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3044,16 +3038,11 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - // DL(G) uses a double-width dividend, so we need to clear the even - // register in the GR128 input. The instruction returns the remainder - // in the even register and the quotient in the odd register. + // DL(G) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - if (is32Bit(VT)) - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - else - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3193,13 +3182,13 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDLoc DL(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast( + SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Op.getOperand(0)), 0); @@ -4669,11 +4658,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(POPCNT); - OPCODE(UMUL_LOHI64); - OPCODE(SDIVREM32); - OPCODE(SDIVREM64); - OPCODE(UDIVREM32); - OPCODE(UDIVREM64); + OPCODE(UMUL_LOHI); + OPCODE(SDIVREM); + OPCODE(UDIVREM); OPCODE(MVC); OPCODE(MVC_LOOP); OPCODE(NC); @@ -5778,14 +5765,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, return DoneMBB; } -// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true +// Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if -// it's "don't care". SubReg is subreg_l32 when extending a GR32 -// and subreg_l64 when extending a GR64. +// it's "don't care". MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, - unsigned SubReg) const { + bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); @@ -5808,7 +5793,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) - .addReg(In128).addReg(Src).addImm(SubReg); + .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; @@ -6172,12 +6157,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); - case SystemZ::AEXT128_64: - return emitExt128(MI, MBB, false, SystemZ::subreg_l64); - case SystemZ::ZEXT128_32: - return emitExt128(MI, MBB, true, SystemZ::subreg_l32); - case SystemZ::ZEXT128_64: - return emitExt128(MI, MBB, true, SystemZ::subreg_l64); + case SystemZ::AEXT128: + return emitExt128(MI, MBB, false); + case SystemZ::ZEXT128: + return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0, 0); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 5dcb19c0a35d..6c9c404816f0 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -86,14 +86,11 @@ enum NodeType : unsigned { // Count number of bits set in operand 0 per byte. POPCNT, - // Wrappers around the ISD opcodes of the same name. The output and - // first input operands are GR128s. The trailing numbers are the - // widths of the second operand in bits. - UMUL_LOHI64, - SDIVREM32, - SDIVREM64, - UDIVREM32, - UDIVREM64, + // Wrappers around the ISD opcodes of the same name. The output is GR128. + // Input operands may be GR64 or GR32, depending on the instruction. + UMUL_LOHI, + SDIVREM, + UDIVREM, // Use a series of MVCs to copy bytes from one memory location to another. // The operands are: @@ -562,7 +559,7 @@ class SystemZTargetLowering : public TargetLowering { unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, unsigned SubReg) const; + bool ClearEven) const; MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode, unsigned BitSize, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 98f66c29ae64..4569be7602e4 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -677,6 +677,22 @@ let Predicates = [FeatureLoadAndTrap] in { def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; } +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + +//===----------------------------------------------------------------------===// +// "Any" extensions +//===----------------------------------------------------------------------===// + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; + +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + //===----------------------------------------------------------------------===// // Truncations //===----------------------------------------------------------------------===// @@ -1216,13 +1232,17 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; +def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), + (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; // Multiplication of memory, producing two results. def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; +def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; +def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Division and remainder @@ -1230,19 +1250,38 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; let hasSideEffects = 1 in { // Do not speculatively execute. // Division and remainder, from registers. - def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; - def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; - def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; - def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; - def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; + def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>; // Division and remainder, from memory. - def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; - def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; - def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; - def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; - def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; + def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; + def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>; + def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>; } +def : Pat<(z_sdivrem GR64:$src1, GR32:$src2), + (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))), + (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, GR64:$src2), + (DSGR (AEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +def : Pat<(z_udivrem GR32:$src1, GR32:$src2), + (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), GR32:$src2)>; +def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))), + (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), bdxaddr20only:$src2)>; +def : Pat<(z_udivrem GR64:$src1, GR64:$src2), + (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Shifts @@ -1894,17 +1933,6 @@ def : Pat<(ctlz GR64:$src), let Predicates = [FeaturePopulationCount], Defs = [CC] in def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; -// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; - -// Extend GR32s and GR64s to GR128s. -let usesCustomInserter = 1 in { - def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; - def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>; - def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; -} - // Search a block of memory for a character. let mayLoad = 1, Defs = [CC] in defm SRST : StringRRE<"srst", 0xB25E, z_search_string>; diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index 3a0e01da42f0..d4cd89ce590f 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -127,7 +127,7 @@ MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, return Copy; } -// Create a virtal register in *TLSBaseAddrReg, and populate it by +// Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index b6feaa49d858..8342463c1086 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -18,7 +18,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #ifndef NDEBUG // Print the set of SUs diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ab2392809f3b..9c6d5819f8a7 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -36,14 +36,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; -def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, +def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i32>]>; -def SDT_ZGR128Binary64 : SDTypeProfile<1, 2, - [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i64>]>; + SDTCisInt<1>, + SDTCisInt<2>]>; def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, @@ -185,11 +181,9 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; -def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; -def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; -def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; -def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; -def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; +def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; +def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; +def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td index adc9f2976f87..72543c1eaee2 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -15,7 +15,7 @@ def Z13Model : SchedMachineModel { let UnsupportedFeatures = Arch11UnsupportedFeatures.List; - + let IssueWidth = 8; let MicroOpBufferSize = 60; // Issue queues let LoadLatency = 1; // Optimistic load latency. @@ -159,7 +159,7 @@ def : InstRW<[FXb], (instregex "CondReturn$")>; // Select instructions //===----------------------------------------------------------------------===// -// Select pseudo +// Select pseudo def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; // CondStore pseudos @@ -226,7 +226,7 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; -def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; @@ -282,7 +282,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; // Load multiple disjoint -def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; // Store multiple (estimated average of ceil(5/2) FXb ops) def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, @@ -446,13 +446,13 @@ def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXa, Lat8], (instregex "MSGR$")>; def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXa2, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXa2, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXa, Lat5], (instregex "MGHI$")>; def : InstRW<[FXa, Lat5], (instregex "MHI$")>; def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXa2, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa2, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -460,8 +460,8 @@ def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>; +def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; @@ -474,7 +474,8 @@ def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -537,7 +538,7 @@ def : InstRW<[FXb], (instregex "TMLH(64)?$")>; def : InstRW<[FXb], (instregex "TMLL(64)?$")>; // Compare logical characters under mask -def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>; +def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Prefetch and execution hint @@ -573,7 +574,7 @@ def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -589,36 +590,45 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; +def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone], + (instregex "CVBG$")>; +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>; -def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone], +def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; -def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>; +def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -688,25 +698,25 @@ def : InstRW<[FXb], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXa, FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXa], (instregex "AEXT128_64$")>; -def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXa], (instregex "AEXT128$")>; +def : InstRW<[FXa], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; @@ -833,7 +843,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>; // Addition def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>; def : InstRW<[VecBF], (instregex "A(E|D)BR$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>; // Subtraction def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>; @@ -848,9 +858,9 @@ def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>; // Division @@ -859,7 +869,7 @@ def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -882,8 +892,8 @@ def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; +def : InstRW<[FXa, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>; def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; @@ -904,7 +914,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; // Load rounded def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; def : InstRW<[VecBF], (instregex "LEXR$")>; -def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>; +def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>; // Load lengthened def : InstRW<[LSU], (instregex "LDE$")>; @@ -955,7 +965,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; // Addition def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>; // Subtraction def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; @@ -968,16 +978,20 @@ def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; -def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>; -def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>; // Multiply and add / subtract -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; -def : InstRW<[VecBF], (instregex "M(A|S)DR$")>; -def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; -def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>; // Division def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; @@ -989,8 +1003,8 @@ def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; //===----------------------------------------------------------------------===// // Compare -def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>; +def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[VecBF], (instregex "C(E|D)R$")>; def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; @@ -1032,7 +1046,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; -def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>; // Convert from / to zoned def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; @@ -1047,7 +1061,7 @@ def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; // Perform floating-point operation -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; +def : InstRW<[FXb, Lat30], (instregex "PFPO$")>; //===----------------------------------------------------------------------===// // DFP: Unary arithmetic @@ -1071,7 +1085,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; // Addition def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>; // Subtraction def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; @@ -1090,15 +1104,15 @@ def : InstRW<[VecDF], (instregex "QADTR$")>; def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; // Reround -def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>; def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; // Shift significand left/right -def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent -def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>; def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// @@ -1115,7 +1129,7 @@ def : InstRW<[VecDF], (instregex "CEXTR$")>; // Test Data Class/Group def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; -def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; +def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; // --------------------------------- Vector --------------------------------- // @@ -1271,32 +1285,43 @@ def : InstRW<[VecStr, Lat5], (instregex "VTM$")>; // Vector: Floating-point arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>; -def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>; +// Conversion and rounding +def : InstRW<[VecBF2], (instregex "VCD(L)?G$")>; +def : InstRW<[VecBF2], (instregex "VCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>; def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>; -def : InstRW<[VecBF2], (instregex "VFADB$")>; -def : InstRW<[VecBF], (instregex "WFADB$")>; -def : InstRW<[VecBF2], (instregex "VCGDB$")>; -def : InstRW<[VecBF], (instregex "WCGDB$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>; -def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>; -def : InstRW<[VecBF2], (instregex "VCLGDB$")>; -def : InstRW<[VecBF], (instregex "WCLGDB$")>; -def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>; -def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; -def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; -def : InstRW<[VecXsPm], (instregex "VFPSO$")>; -def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>; +def : InstRW<[VecBF2], (instregex "VC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF2], (instregex "VFI$")>; +def : InstRW<[VecBF2], (instregex "VFIDB$")>; +def : InstRW<[VecBF], (instregex "WFIDB$")>; -// divide / square root +// Sign operations +def : InstRW<[VecXsPm], (instregex "VFPSO$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; + +// Test data class +def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; + +// Add / subtract +def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[VecBF2], (instregex "VFM$")>; +def : InstRW<[VecBF2], (instregex "VFMDB$")>; +def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; + +// Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; @@ -1308,10 +1333,10 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; //===----------------------------------------------------------------------===// @@ -1351,12 +1376,12 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; -def : InstRW<[FXa, Lat3], (instregex "IPK$")>; -def : InstRW<[LSU], (instregex "SPKA$")>; -def : InstRW<[LSU], (instregex "SSM$")>; -def : InstRW<[FXb], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; def : InstRW<[FXa, Lat3], (instregex "IAC$")>; -def : InstRW<[LSU], (instregex "SAC(F)?$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; //===----------------------------------------------------------------------===// // System: Control Register Instructions @@ -1411,14 +1436,14 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; //===----------------------------------------------------------------------===// // System: Address-Space Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; -def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; def : InstRW<[FXb, Lat30], (instregex "PR$")>; def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; @@ -1430,7 +1455,7 @@ def : InstRW<[FXb, Lat20], (instregex "TAR$")>; // System: Linkage-Stack Instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXb, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>; def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; @@ -1442,13 +1467,13 @@ def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; -def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>; def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], (instregex "STCK(F)?$")>; def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], (instregex "STCKE$")>; def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; -def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>; +def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>; //===----------------------------------------------------------------------===// // System: CPU-Related Instructions @@ -1459,7 +1484,7 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>; def : InstRW<[FXb, Lat30], (instregex "PTF$")>; def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; @@ -1468,7 +1493,7 @@ def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXb, Lat30], (instregex "SVC$")>; -def : InstRW<[FXb], (instregex "MC$")>; +def : InstRW<[FXb, GroupAlone], (instregex "MC$")>; def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; @@ -1483,7 +1508,8 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; def : InstRW<[FXb], (instregex "LPP$")>; def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td index 128049a09086..e3e1999d8ad8 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -627,8 +627,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td index 76b378454631..59f37205f412 100644 --- a/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -665,8 +665,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index ce5c57e0f519..9ac768b2189d 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -779,15 +779,14 @@ int SystemZTTIImpl:: getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // vlvgp will insert two grs into a vector register, so only count half the // number of instructions. - if (Opcode == Instruction::InsertElement && - Val->getScalarType()->isIntegerTy(64)) + if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) return ((Index % 2 == 0) ? 1 : 0); if (Opcode == Instruction::ExtractElement) { int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1); // Give a slight penalty for moving out of vector pipeline to FXU unit. - if (Index == 0 && Val->getScalarType()->isIntegerTy()) + if (Index == 0 && Val->isIntOrIntVectorTy()) Cost += 1; return Cost; diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index ad59f2f40587..00bf02469bdd 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -115,8 +115,8 @@ void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { - OS << "\t.functype\t" << name; + MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) { + OS << "\t.functype\t" << Symbol->getName(); if (Results.empty()) OS << ", void"; else { @@ -171,7 +171,7 @@ void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetELFStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { + MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) { // Nothing to emit here. TODO: Re-design how linking works and re-evaluate // whether it's necessary for .o files to declare indirect function types. } @@ -255,9 +255,25 @@ void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl &Params, SmallVectorImpl &Results) { - // Nothing to emit here. TODO: Re-design how linking works and re-evaluate - // whether it's necessary for .o files to declare indirect function types. + MCSymbol *Symbol, SmallVectorImpl &Params, + SmallVectorImpl &Results) { + MCSymbolWasm *WasmSym = cast(Symbol); + if (WasmSym->isFunction()) { + // Symbol already has its arguments and result set. + return; + } + + SmallVector ValParams; + for (MVT Ty : Params) + ValParams.push_back(WebAssembly::toValType(Ty)); + + SmallVector ValResults; + for (MVT Ty : Results) + ValResults.push_back(WebAssembly::toValType(Ty)); + + WasmSym->setParams(std::move(ValParams)); + WasmSym->setReturns(std::move(ValResults)); + WasmSym->setIsFunction(true); } void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 5ad147e5e596..102d7219a1e7 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -44,7 +44,7 @@ class WebAssemblyTargetStreamer : public MCTargetStreamer { /// .endfunc virtual void emitEndFunc() = 0; /// .functype - virtual void emitIndirectFunctionType(StringRef name, + virtual void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) = 0; /// .indidx @@ -69,7 +69,7 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -87,7 +87,7 @@ class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -105,7 +105,7 @@ class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer { void emitGlobal(ArrayRef Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl &Params, SmallVectorImpl &Results) override; void emitIndIdx(const MCExpr *Value) override; diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index f51585a10ca1..211358ad66cd 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -84,7 +84,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { SmallVector Results; SmallVector Params; ComputeSignatureVTs(F, TM, Params, Results); - getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params, + getTargetStreamer()->emitIndirectFunctionType(getSymbol(&F), Params, Results); } } @@ -214,11 +214,8 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { if (const GlobalValue *GV = dyn_cast(CV)) if (GV->getValueType()->isFunctionTy()) { - MCSymbol* Sym = getSymbol(GV); - if (!isa(Sym)) - cast(Sym)->setIsFunction(true); return MCSymbolRefExpr::create( - Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); } return AsmPrinter::lowerConstant(CV); } diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 1691808d05a0..700111743ee8 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -132,7 +132,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, // no blocks not dominated by the loop header. // - It's desirable to preserve the original block order when possible. // We use two ready lists; Preferred and Ready. Preferred has recently - // processed sucessors, to help preserve block sequences from the original + // processed successors, to help preserve block sequences from the original // order. Ready has the remaining ready blocks. PriorityQueue, CompareBlockNumbers> diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index ff186eb91503..8880539804ca 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -112,8 +112,6 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym, MCSymbolRefExpr::VariantKind VK = IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION : MCSymbolRefExpr::VK_None; - if (!isa(Sym)) - cast(Sym)->setIsFunction(IsFunc); const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx); diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index c02ef4a1c399..2599064334ee 100644 --- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -394,11 +394,22 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = { /* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR, // ELEMENT-WISE ATOMIC MEMORY -/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, + +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, // EXCEPTION HANDLING /* UNWIND_RESUME */ unsupported, @@ -839,11 +850,21 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = { /* MEMCPY */ "memcpy", /* MEMMOVE */ "memset", /* MEMSET */ "memmove", -/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1", -/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2", -/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4", -/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8", -/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16", +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, /* UNWIND_RESUME */ "_Unwind_Resume", /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 825f23dc52d9..c1d216c8b7af 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2453,8 +2453,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, break; } - // In MS inline asm curly braces mark the begining/end of a block, therefore - // they should be interepreted as end of statement + // In MS inline asm curly braces mark the beginning/end of a block, + // therefore they should be interepreted as end of statement CurlyAsEndOfStatement = isParsingIntelSyntax() && isParsingInlineAsm() && (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5e809c34325e..f5f3a4cc83dc 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::EXTRQI: if (MI->getOperand(2).isImm() && MI->getOperand(3).isImm()) - DecodeEXTRQIMask(MI->getOperand(2).getImm(), + DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(), MI->getOperand(3).getImm(), ShuffleMask); @@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::INSERTQI: if (MI->getOperand(3).isImm() && MI->getOperand(4).isImm()) - DecodeINSERTQIMask(MI->getOperand(3).getImm(), + DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(), MI->getOperand(4).getImm(), ShuffleMask); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 914fb36f91a7..733eac7c0321 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -110,7 +110,7 @@ class X86AsmBackend : public MCAsmBackend { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1be5aec849fc..8a0fbfb45b22 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &Mask) { Mask.push_back(IsLoad ? static_cast(SM_SentinelZero) : i); } -void DecodeEXTRQIMask(int Len, int Idx, +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit extraction instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes - // of the lower 64-bits. The upper 64-bits are undefined. + // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining + // elements of the lower 64-bits. The upper 64-bits are undefined. for (int i = 0; i != Len; ++i) ShuffleMask.push_back(i + Idx); - for (int i = Len; i != 8; ++i) + for (int i = Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(SM_SentinelZero); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeINSERTQIMask(int Len, int Idx, +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit insertion instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // INSERTQ: Extract lowest Len bytes from lower half of second source and - // insert over first source starting at Idx byte. The upper 64-bits are + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source starting at Idx element. The upper 64-bits are // undefined. for (int i = 0; i != Idx; ++i) ShuffleMask.push_back(i); for (int i = 0; i != Len; ++i) - ShuffleMask.push_back(i + 16); - for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i + NumElts); + for (int i = Idx + Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(i); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 17619d09d059..251c9f7558ec 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl &ShuffleMask); void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. -void DecodeEXTRQIMask(int Len, int Idx, +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); -/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. -void DecodeINSERTQIMask(int Len, int Idx, +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl &ShuffleMask); /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7437ebacfac3..4ca57fe9fb00 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -451,6 +451,7 @@ class GoldmontProc : ProcessorModel &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, @@ -43,14 +44,24 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, const X86TargetLowering &TLI = *getTLI(); LLVMContext &Context = OrigArg.Ty->getContext(); - EVT VT = TLI.getValueType(DL, OrigArg.Ty); + + SmallVector SplitVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + if (SplitVTs.size() != 1) { + // TODO: support struct/array split + return false; + } + + EVT VT = SplitVTs[0]; unsigned NumParts = TLI.getNumRegisters(Context, VT); if (NumParts == 1) { // replace the original type ( pointer -> GPR ). SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context), OrigArg.Flags, OrigArg.IsFixed); - return; + return true; } SmallVector SplitRegs; @@ -67,6 +78,7 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, } PerformArgSplit(SplitRegs); + return true; } namespace { @@ -113,9 +125,11 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector SplitArgs; - splitToValueTypes( - OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { + MIRBuilder.buildUnmerge(Regs, VReg); + })) + return false; FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -181,12 +195,23 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector SplitArgs; unsigned Idx = 0; for (auto &Arg : F.args()) { + + // TODO: handle not simple cases. + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) + return false; + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); - setArgFlags(OrigArg, Idx + 1, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildMerge(VRegs[Idx], Regs); - }); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); + })) + return false; Idx++; } diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 8a8afb568298..6a5dabf33a0a 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -39,7 +39,7 @@ class X86CallLowering : public CallLowering { /// A function of this type is used to perform value split action. typedef std::function)> SplitArgTy; - void splitToValueTypes(const ArgInfo &OrigArgInfo, + bool splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7d146d050a5c..6decb550ad5f 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -651,7 +651,15 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> + CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>, + // AVX + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM1, YMM2, YMM3, YMM4, YMM5, YMM6]>>>, + // AVX-512 + CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], + CCIfSubtarget<"hasAVX512()", + CCAssignToReg<[ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6]>>> ]>; def CC_X86_64_HiPE : CallingConv<[ diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 621505aaded9..ee9e78146305 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3039,6 +3039,9 @@ bool X86FastISel::fastLowerArguments() { if (!Subtarget->is64Bit()) return false; + if (Subtarget->useSoftFloat()) + return false; + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index e3aa227702be..f294e819090b 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -972,7 +972,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseRedZone = false; bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); // The default stack probe size is 4096 if the function has no stackprobesize @@ -1011,7 +1010,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI.setStackSize(StackSize); - UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -1189,7 +1187,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); + assert(!X86FI->getUsesRedZone() && + "The Red Zone is not accounted for in stack probes"); // Check whether EAX is livein for this block. bool isEAXAlive = isEAXLiveIn(MBB); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b89914f8893e..65486cf7f529 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodeINSERTPSMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; + case X86ISD::EXTRQI: + if (isa(N->getOperand(1)) && + isa(N->getOperand(2))) { + int BitLen = N->getConstantOperandVal(1); + int BitIdx = N->getConstantOperandVal(2); + DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = true; + } + break; + case X86ISD::INSERTQI: + if (isa(N->getOperand(2)) && + isa(N->getOperand(3))) { + int BitLen = N->getConstantOperandVal(2); + int BitIdx = N->getConstantOperandVal(3); + DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + } + break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -9317,11 +9337,11 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getBitcast(VT, V); } -/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. -static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +// EXTRQ: Extract Len elements from lower half of source, starting at Idx. +// Remainder of lower half result is zero and upper half is all undef. +static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx, const APInt &Zeroable) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -9329,120 +9349,133 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) - return SDValue(); + return false; - // EXTRQ: Extract Len elements from lower half of source, starting at Idx. - // Remainder of lower half result is zero and upper half is all undef. - auto LowerAsEXTRQ = [&]() { - // Determine the extraction length from the part of the - // lower half that isn't zeroable. - int Len = HalfSize; - for (; Len > 0; --Len) - if (!Zeroable[Len - 1]) - break; - assert(Len > 0 && "Zeroable shuffle mask"); + // Determine the extraction length from the part of the + // lower half that isn't zeroable. + int Len = HalfSize; + for (; Len > 0; --Len) + if (!Zeroable[Len - 1]) + break; + assert(Len > 0 && "Zeroable shuffle mask"); - // Attempt to match first Len sequential elements from the lower half. - SDValue Src; - int Idx = -1; - for (int i = 0; i != Len; ++i) { - int M = Mask[i]; - if (M < 0) - continue; - SDValue &V = (M < Size ? V1 : V2); - M = M % Size; + // Attempt to match first Len sequential elements from the lower half. + SDValue Src; + int Idx = -1; + for (int i = 0; i != Len; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + SDValue &V = (M < Size ? V1 : V2); + M = M % Size; - // The extracted elements must start at a valid index and all mask - // elements must be in the lower half. - if (i > M || M >= HalfSize) - return SDValue(); + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) + return false; - if (Idx < 0 || (Src == V && Idx == (M - i))) { - Src = V; - Idx = M - i; - continue; - } - return SDValue(); + if (Idx < 0 || (Src == V && Idx == (M - i))) { + Src = V; + Idx = M - i; + continue; + } + return false; + } + + if (!Src || Idx < 0) + return false; + + assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Src; + return true; +} + +// INSERTQ: Extract lowest Len elements from lower half of second source and +// insert over first source, starting at Idx. +// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } +static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx) { + int Size = Mask.size(); + int HalfSize = Size / 2; + assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + + // Upper half must be undefined. + if (!isUndefInRange(Mask, HalfSize, HalfSize)) + return false; + + for (int Idx = 0; Idx != HalfSize; ++Idx) { + SDValue Base; + + // Attempt to match first source from mask before insertion point. + if (isUndefInRange(Mask, 0, Idx)) { + /* EMPTY */ + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + Base = V1; + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + Base = V2; + } else { + continue; } - if (Idx < 0) - return SDValue(); + // Extend the extraction length looking to match both the insertion of + // the second source and the remaining elements of the first. + for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { + SDValue Insert; + int Len = Hi - Idx; - assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - }; + // Match insertion. + if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { + Insert = V1; + } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { + Insert = V2; + } else { + continue; + } - if (SDValue ExtrQ = LowerAsEXTRQ()) - return ExtrQ; - - // INSERTQ: Extract lowest Len elements from lower half of second source and - // insert over first source, starting at Idx. - // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } - auto LowerAsInsertQ = [&]() { - for (int Idx = 0; Idx != HalfSize; ++Idx) { - SDValue Base; - - // Attempt to match first source from mask before insertion point. - if (isUndefInRange(Mask, 0, Idx)) { + // Match the remaining elements of the lower half. + if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { /* EMPTY */ - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + } else if ((!Base || (Base == V1)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { Base = V1; - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + } else if ((!Base || (Base == V2)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, + Size + Hi)) { Base = V2; } else { continue; } - // Extend the extraction length looking to match both the insertion of - // the second source and the remaining elements of the first. - for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { - SDValue Insert; - int Len = Hi - Idx; - - // Match insertion. - if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { - Insert = V1; - } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { - Insert = V2; - } else { - continue; - } - - // Match the remaining elements of the lower half. - if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { - /* EMPTY */ - } else if ((!Base || (Base == V1)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { - Base = V1; - } else if ((!Base || (Base == V2)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, - Size + Hi)) { - Base = V2; - } else { - continue; - } - - // We may not have a base (first source) - this can safely be undefined. - if (!Base) - Base = DAG.getUNDEF(VT); - - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - } + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Base; + V2 = Insert; + return true; } + } - return SDValue(); - }; + return false; +} - if (SDValue InsertQ = LowerAsInsertQ()) - return InsertQ; +/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. +static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + SelectionDAG &DAG) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) + return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + + if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) + return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), + V2 ? V2 : DAG.getUNDEF(VT), + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -22817,7 +22850,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { auto Builder = IRBuilder<>(AI); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - auto SynchScope = AI->getSynchScope(); + auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); @@ -22839,7 +22872,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. - if (SynchScope == SingleThread) + if (SSID == SyncScope::SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; @@ -22858,7 +22891,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, AI->getType()->getPrimitiveSizeInBits()); - Loaded->setAtomic(Order, SynchScope); + Loaded->setAtomic(Order, SSID); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return Loaded; @@ -22869,13 +22902,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast( + SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); @@ -23203,6 +23236,20 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); + // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. + if (Subtarget.hasVPOPCNTDQ()) { + if (VT == MVT::v8i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + if (VT == MVT::v16i8 || VT == MVT::v16i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + } + if (!Subtarget.hasSSSE3()) { // We can't use the fast LUT approach, so fall back on vectorized bitmath. assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); @@ -27101,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const X86Subtarget &Subtarget, @@ -27111,38 +27159,67 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - bool ContainsZeros = false; - APInt Zeroable(NumMaskElts, false); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (isUndefOrZero(M)) - Zeroable.setBit(i); - ContainsZeros |= (M == SM_SentinelZero); - } + bool ContainsZeros = + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); - // Attempt to match against byte/bit shifts. - // FIXME: Add 512-bit support. - if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || - (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { - int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskScalarSizeInBits, Mask, - 0, Zeroable, Subtarget); - if (0 < ShiftAmt) { - PermuteImm = (unsigned)ShiftAmt; + // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns. + if (!ContainsZeros && MaskScalarSizeInBits == 64) { + // Check for lane crossing permutes. + if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { + // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). + if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); + PermuteImm = getV4X86ShuffleImm(Mask); + return true; + } + if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { + SmallVector RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); + PermuteImm = getV4X86ShuffleImm(RepeatedMask); + return true; + } + } + } else if (AllowFloatDomain && Subtarget.hasAVX()) { + // VPERMILPD can permute with a non-repeating shuffle. + Shuffle = X86ISD::VPERMILPI; + ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); + PermuteImm = 0; + for (int i = 0, e = Mask.size(); i != e; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); + PermuteImm |= (M & 1) << i; + } return true; } } - // Ensure we don't contain any zero elements. - if (ContainsZeros) - return false; + // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns. + // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we + // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). + if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && + !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { + SmallVector RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { + // Narrow the repeated mask to create 32-bit element permutes. + SmallVector WordMask = RepeatedMask; + if (MaskScalarSizeInBits == 64) + scaleShuffleMask(2, RepeatedMask, WordMask); - assert(llvm::all_of(Mask, [&](int M) { - return SM_SentinelUndef <= M && M < (int)NumMaskElts; - }) && "Expected unary shuffle"); + Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); + ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); + ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); + PermuteImm = getV4X86ShuffleImm(WordMask); + return true; + } + } - // Handle PSHUFLW/PSHUFHW repeated patterns. - if (MaskScalarSizeInBits == 16) { + // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns. + if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { ArrayRef LoMask(Mask.data() + 0, 4); @@ -27170,78 +27247,23 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, PermuteImm = getV4X86ShuffleImm(OffsetHiMask); return true; } - - return false; } - return false; } - // We only support permutation of 32/64 bit elements after this. - if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64) - return false; - - // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we - // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). - if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX()) - return false; - - // Pre-AVX2 we must use float shuffles on 256-bit vectors. - if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) { - AllowFloatDomain = true; - AllowIntDomain = false; - } - - // Check for lane crossing permutes. - if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { - // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). - if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); - PermuteImm = getV4X86ShuffleImm(Mask); + // Attempt to match against byte/bit shifts. + // FIXME: Add 512-bit support. + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { + int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, + MaskScalarSizeInBits, Mask, + 0, Zeroable, Subtarget); + if (0 < ShiftAmt) { + PermuteImm = (unsigned)ShiftAmt; return true; } - if (Subtarget.hasAVX512() && MaskVT.is512BitVector() && Mask.size() == 8) { - SmallVector RepeatedMask; - if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); - PermuteImm = getV4X86ShuffleImm(RepeatedMask); - return true; - } - } - return false; } - // VPERMILPD can permute with a non-repeating shuffle. - if (AllowFloatDomain && MaskScalarSizeInBits == 64) { - Shuffle = X86ISD::VPERMILPI; - ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); - PermuteImm = 0; - for (int i = 0, e = Mask.size(); i != e; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); - PermuteImm |= (M & 1) << i; - } - return true; - } - - // We need a repeating shuffle mask for VPERMILPS/PSHUFD. - SmallVector RepeatedMask; - if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) - return false; - - // Narrow the repeated mask for 32-bit element permutes. - SmallVector WordMask = RepeatedMask; - if (MaskScalarSizeInBits == 64) - scaleShuffleMask(2, RepeatedMask, WordMask); - - Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD); - ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32); - ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); - PermuteImm = getV4X86ShuffleImm(WordMask); - return true; + return false; } // Attempt to match a combined unary shuffle mask against supported binary @@ -27303,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, SDLoc &DL, @@ -27388,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // Attempt to combine to INSERTPS. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { - APInt Zeroable(4, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) - if (Mask[i] < 0) - Zeroable.setBit(i); - if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; @@ -27578,7 +27596,14 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = !FloatDomain || (Depth > 3); + bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && + (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); + + // Determine zeroable mask elements. + APInt Zeroable(NumMaskElts, 0); + for (unsigned i = 0; i != NumMaskElts; ++i) + if (isUndefOrZero(Mask[i])) + Zeroable.setBit(i); if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load @@ -27612,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) @@ -27648,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { @@ -27668,6 +27693,45 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return true; } + // Typically from here on, we need an integer version of MaskVT. + MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); + IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); + + // Annoyingly, SSE4A instructions don't map into the above match helpers. + if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, + Zeroable)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + + if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + V2 = DAG.getBitcast(IntMaskVT, V2); + DCI.AddToWorklist(V2.getNode()); + Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + } + // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) @@ -27688,9 +27752,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27719,9 +27781,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27746,9 +27806,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); V1 = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(V1.getNode()); @@ -27807,8 +27865,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); VPermIdx.push_back(Idx); } - MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts); - SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx); + SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27831,8 +27888,6 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, unsigned NumLanes = MaskVT.getSizeInBits() / 128; unsigned NumEltsPerLane = NumMaskElts / NumLanes; SmallVector VPerm2Idx; - MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits()); - MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts); unsigned M2ZImm = 0; for (int M : Mask) { if (M == SM_SentinelUndef) { @@ -27852,7 +27907,7 @@ static bool combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(MaskVT, V2); DCI.AddToWorklist(V2.getNode()); - SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, MaskIdxVT, DAG, DL, true); + SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPerm2MaskOp.getNode()); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); @@ -29163,9 +29218,9 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // v8i16 and v16i16. // For these two cases, we can shuffle the upper element bytes to a // consecutive sequence at the start of the vector and treat the results as - // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However, + // v16i8 or v32i8, and for v61i8 this is the preferable solution. However, // for v16i16 this is not the case, because the shuffle is expensive, so we - // avoid sign-exteding to this type entirely. + // avoid sign-extending to this type entirely. // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; @@ -29207,7 +29262,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, SExtVT = MVT::v16i8; // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)), // it is not profitable to sign-extend to 256-bit because this will - // require an extra cross-lane shuffle which is more exprensive than + // require an extra cross-lane shuffle which is more expensive than // truncating the result of the compare to 128-bits. break; case MVT::v32i1: @@ -29580,8 +29635,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, // (extends the sign bit which is zero). // So it is correct to skip the sign/zero extend instruction. if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || - Root.getOpcode() == ISD::ZERO_EXTEND || - Root.getOpcode() == ISD::ANY_EXTEND)) + Root.getOpcode() == ISD::ZERO_EXTEND || + Root.getOpcode() == ISD::ANY_EXTEND)) Root = Root.getOperand(0); // If there was a match, we want Root to be a select that is the root of an @@ -34950,6 +35005,40 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + auto *ConstantX = dyn_cast(X); + if (ConstantX) { + if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { + // This is a complicated way to get -1 or 0 from the carry flag: + // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax + // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + Y.getOperand(1)); + } + + if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { + SDValue EFLAGS = Y->getOperand(1); + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa(EFLAGS.getOperand(1))) { + // Swap the operands of a SUB, and we have the same pattern as above. + // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB + // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + NewEFLAGS); + } + } + } + if (CC == X86::COND_B) { // X + SETB Z --> X + (mask SBB Z, Z) // X - SETB Z --> X - (mask SBB Z, Z) @@ -34996,7 +35085,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. - if (auto *ConstantX = dyn_cast(X)) { + if (ConstantX) { // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with // fake operands: // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) @@ -35549,6 +35638,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e1ade92979dc..dbbc2bbba6a4 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -767,6 +767,19 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE + // for given operand and result types. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 0), + // (extract_elt V, 2), + // (extract_elt V, 4), + // (extract_elt V, 6)) + // --> + // v4i32 truncate (bitcast V to v4i64) + bool isDesirableToCombineBuildVectorToTruncate() const override { + return true; + } + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f3094b781c49..34d4816a2518 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -10488,7 +10488,7 @@ namespace { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index e34a90e975b8..859d3288db89 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -32,6 +32,8 @@ #define DEBUG_TYPE "X86-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -56,7 +58,7 @@ class X86InstructionSelector : public InstructionSelector { /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; - // TODO: remove after suported by Tablegen-erated instruction selection. + // TODO: remove after supported by Tablegen-erated instruction selection. unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, uint64_t Alignment) const; @@ -64,6 +66,8 @@ class X86InstructionSelector : public InstructionSelector { MachineFunction &MF) const; bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, @@ -75,6 +79,8 @@ class X86InstructionSelector : public InstructionSelector { bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, @@ -262,6 +268,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectFrameIndexOrGep(I, MRI, MF)) return true; + if (selectGlobalValue(I, MRI, MF)) + return true; if (selectConstant(I, MRI, MF)) return true; if (selectTrunc(I, MRI, MF)) @@ -272,6 +280,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectUadde(I, MRI, MF)) return true; + if (selectUnmergeValues(I, MRI, MF)) + return true; if (selectMergeValues(I, MRI, MF)) return true; if (selectExtract(I, MRI, MF)) @@ -423,6 +433,15 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { + if (Ty == LLT::pointer(0, 64)) + return X86::LEA64r; + else if (Ty == LLT::pointer(0, 32)) + return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; + else + llvm_unreachable("Can't get LEA opcode. Unsupported type."); +} + bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -435,14 +454,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, LLT Ty = MRI.getType(DefReg); // Use LEA to calculate frame index and GEP - unsigned NewOpc; - if (Ty == LLT::pointer(0, 64)) - NewOpc = X86::LEA64r; - else if (Ty == LLT::pointer(0, 32)) - NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; - else - llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type."); - + unsigned NewOpc = getLeaOP(Ty, STI); I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); @@ -458,6 +470,54 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + unsigned Opc = I.getOpcode(); + + if (Opc != TargetOpcode::G_GLOBAL_VALUE) + return false; + + auto GV = I.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + return false; // TODO: we don't support TLS yet. + } + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; + + X86AddressMode AM; + AM.GV = GV; + AM.GVOpFlags = STI.classifyGlobalReference(GV); + + // TODO: The ABI requires an extra load. not supported yet. + if (isGlobalStubReference(AM.GVOpFlags)) + return false; + + // TODO: This reference is relative to the pic base. not supported yet. + if (isGlobalRelativeToPICBase(AM.GVOpFlags)) + return false; + + if (STI.isPICStyleRIPRel()) { + // Use rip-relative addressing. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + + const unsigned DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + unsigned NewOpc = getLeaOP(Ty, STI); + + I.setDesc(TII.get(NewOpc)); + MachineInstrBuilder MIB(MF, I); + + I.RemoveOperand(1); + addFullAddress(MIB, AM); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + bool X86InstructionSelector::selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -467,7 +527,8 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, const unsigned DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); - assert(Ty.isScalar() && "invalid element type."); + if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) + return false; uint64_t Val = 0; if (I.getOperand(1).isCImm()) { @@ -576,37 +637,40 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy == LLT::scalar(1)) { + if (SrcTy != LLT::scalar(1)) + return false; - unsigned AndOpc; - if (DstTy == LLT::scalar(32)) - AndOpc = X86::AND32ri8; - else if (DstTy == LLT::scalar(64)) - AndOpc = X86::AND64ri8; - else - return false; - - unsigned DefReg = - MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); + unsigned AndOpc; + if (DstTy == LLT::scalar(8)) + AndOpc = X86::AND8ri; + else if (DstTy == LLT::scalar(16)) + AndOpc = X86::AND16ri8; + else if (DstTy == LLT::scalar(32)) + AndOpc = X86::AND32ri8; + else if (DstTy == LLT::scalar(64)) + AndOpc = X86::AND64ri8; + else + return false; + unsigned DefReg = SrcReg; + if (DstTy != LLT::scalar(8)) { + DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) .addImm(0) .addReg(SrcReg) .addImm(X86::sub_8bit); - - MachineInstr &AndInst = - *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) - .addReg(DefReg) - .addImm(1); - - constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); - - I.eraseFromParent(); - return true; } - return false; + MachineInstr &AndInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) + .addReg(DefReg) + .addImm(1); + + constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); + + I.eraseFromParent(); + return true; } bool X86InstructionSelector::selectCmp(MachineInstr &I, @@ -918,6 +982,33 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; + + // Split to extracts. + unsigned NumDefs = I.getNumOperands() - 1; + unsigned SrcReg = I.getOperand(NumDefs).getReg(); + unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { + + MachineInstr &ExtrInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) + .addReg(SrcReg) + .addImm(Idx * DefSize); + + if (!select(ExtrInst)) + return false; + } + + I.eraseFromParent(); + return true; +} + bool X86InstructionSelector::selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index a5fa3340c3f1..744ba21011af 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -69,12 +69,14 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { for (auto Ty : {s8, s16, s32, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -90,8 +92,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); // Extensions - setAction({G_ZEXT, s32}, Legal); - setAction({G_SEXT, s32}, Legal); + for (auto Ty : {s8, s16, s32}) { + setAction({G_ZEXT, Ty}, Legal); + setAction({G_SEXT, Ty}, Legal); + } for (auto Ty : {s1, s8, s16}) { setAction({G_ZEXT, 1, Ty}, Legal); @@ -125,12 +129,14 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { for (auto Ty : {s8, s16, s32, s64, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -146,7 +152,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); // Extensions - for (auto Ty : {s32, s64}) { + for (auto Ty : {s8, s16, s32, s64}) { setAction({G_ZEXT, Ty}, Legal); setAction({G_SEXT, Ty}, Legal); } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 33bc8e11a572..fd2837b79103 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1042,7 +1042,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { - assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. @@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI, return Comment; } +static void printConstant(const Constant *COp, raw_ostream &CS) { + if (isa(COp)) { + CS << "u"; + } else if (auto *CI = dyn_cast(COp)) { + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + const auto &Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } + } else if (auto *CF = dyn_cast(COp)) { + SmallString<32> Str; + CF->getValueAPF().toString(Str); + CS << Str; + } else { + CS << "?"; + } +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); @@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // For loads from a constant pool to a vector register, print the constant // loaded. CASE_ALL_MOV_RM() + case X86::VBROADCASTF128: + case X86::VBROADCASTI128: + case X86::VBROADCASTF32X4Z256rm: + case X86::VBROADCASTF32X4rm: + case X86::VBROADCASTF32X8rm: + case X86::VBROADCASTF64X2Z128rm: + case X86::VBROADCASTF64X2rm: + case X86::VBROADCASTF64X4rm: + case X86::VBROADCASTI32X4Z256rm: + case X86::VBROADCASTI32X4rm: + case X86::VBROADCASTI32X8rm: + case X86::VBROADCASTI64X2Z128rm: + case X86::VBROADCASTI64X2rm: + case X86::VBROADCASTI64X4rm: if (!OutStreamer->isVerboseAsm()) break; if (MI->getNumOperands() <= 4) break; if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumLanes = 1; + // Override NumLanes for the broadcast instructions. + switch (MI->getOpcode()) { + case X86::VBROADCASTF128: NumLanes = 2; break; + case X86::VBROADCASTI128: NumLanes = 2; break; + case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTF32X4rm: NumLanes = 4; break; + case X86::VBROADCASTF32X8rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2rm: NumLanes = 4; break; + case X86::VBROADCASTF64X4rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4rm: NumLanes = 4; break; + case X86::VBROADCASTI32X8rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2rm: NumLanes = 4; break; + case X86::VBROADCASTI64X4rm: NumLanes = 2; break; + } + std::string Comment; raw_string_ostream CS(Comment); const MachineOperand &DstOp = MI->getOperand(0); CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; if (auto *CDS = dyn_cast(C)) { CS << "["; - for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { - if (i != 0) - CS << ","; - if (CDS->getElementType()->isIntegerTy()) - CS << CDS->getElementAsInteger(i); - else if (CDS->getElementType()->isFloatTy()) - CS << CDS->getElementAsFloat(i); - else if (CDS->getElementType()->isDoubleTy()) - CS << CDS->getElementAsDouble(i); - else - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { + if (i != 0 || l != 0) + CS << ","; + if (CDS->getElementType()->isIntegerTy()) + CS << CDS->getElementAsInteger(i); + else if (CDS->getElementType()->isFloatTy()) + CS << CDS->getElementAsFloat(i); + else if (CDS->getElementType()->isDoubleTy()) + CS << CDS->getElementAsDouble(i); + else + CS << "?"; + } } CS << "]"; OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); } else if (auto *CV = dyn_cast(C)) { CS << "<"; - for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { - if (i != 0) - CS << ","; - Constant *COp = CV->getOperand(i); - if (isa(COp)) { - CS << "u"; - } else if (auto *CI = dyn_cast(COp)) { - if (CI->getBitWidth() <= 64) { - CS << CI->getZExtValue(); - } else { - // print multi-word constant as (w0,w1) - const auto &Val = CI->getValue(); - CS << "("; - for (int i = 0, N = Val.getNumWords(); i < N; ++i) { - if (i > 0) - CS << ","; - CS << Val.getRawData()[i]; - } - CS << ")"; - } - } else if (auto *CF = dyn_cast(COp)) { - SmallString<32> Str; - CF->getValueAPF().toString(Str); - CS << Str; - } else { - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { + if (i != 0 || l != 0) + CS << ","; + printConstant(CV->getOperand(i), CS); } } CS << ">"; @@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } break; + case X86::VBROADCASTSSrm: + case X86::VBROADCASTSSYrm: + case X86::VBROADCASTSSZ128m: + case X86::VBROADCASTSSZ256m: + case X86::VBROADCASTSSZm: + case X86::VBROADCASTSDYrm: + case X86::VBROADCASTSDZ256m: + case X86::VBROADCASTSDZm: + case X86::VPBROADCASTBrm: + case X86::VPBROADCASTBYrm: + case X86::VPBROADCASTBZ128m: + case X86::VPBROADCASTBZ256m: + case X86::VPBROADCASTBZm: + case X86::VPBROADCASTDrm: + case X86::VPBROADCASTDYrm: + case X86::VPBROADCASTDZ128m: + case X86::VPBROADCASTDZ256m: + case X86::VPBROADCASTDZm: + case X86::VPBROADCASTQrm: + case X86::VPBROADCASTQYrm: + case X86::VPBROADCASTQZ128m: + case X86::VPBROADCASTQZ256m: + case X86::VPBROADCASTQZm: + case X86::VPBROADCASTWrm: + case X86::VPBROADCASTWYrm: + case X86::VPBROADCASTWZ128m: + case X86::VPBROADCASTWZ256m: + case X86::VPBROADCASTWZm: + if (!OutStreamer->isVerboseAsm()) + break; + if (MI->getNumOperands() <= 4) + break; + if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumElts; + switch (MI->getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VBROADCASTSSrm: NumElts = 4; break; + case X86::VBROADCASTSSYrm: NumElts = 8; break; + case X86::VBROADCASTSSZ128m: NumElts = 4; break; + case X86::VBROADCASTSSZ256m: NumElts = 8; break; + case X86::VBROADCASTSSZm: NumElts = 16; break; + case X86::VBROADCASTSDYrm: NumElts = 4; break; + case X86::VBROADCASTSDZ256m: NumElts = 4; break; + case X86::VBROADCASTSDZm: NumElts = 8; break; + case X86::VPBROADCASTBrm: NumElts = 16; break; + case X86::VPBROADCASTBYrm: NumElts = 32; break; + case X86::VPBROADCASTBZ128m: NumElts = 16; break; + case X86::VPBROADCASTBZ256m: NumElts = 32; break; + case X86::VPBROADCASTBZm: NumElts = 64; break; + case X86::VPBROADCASTDrm: NumElts = 4; break; + case X86::VPBROADCASTDYrm: NumElts = 8; break; + case X86::VPBROADCASTDZ128m: NumElts = 4; break; + case X86::VPBROADCASTDZ256m: NumElts = 8; break; + case X86::VPBROADCASTDZm: NumElts = 16; break; + case X86::VPBROADCASTQrm: NumElts = 2; break; + case X86::VPBROADCASTQYrm: NumElts = 4; break; + case X86::VPBROADCASTQZ128m: NumElts = 2; break; + case X86::VPBROADCASTQZ256m: NumElts = 4; break; + case X86::VPBROADCASTQZm: NumElts = 8; break; + case X86::VPBROADCASTWrm: NumElts = 8; break; + case X86::VPBROADCASTWYrm: NumElts = 16; break; + case X86::VPBROADCASTWZ128m: NumElts = 8; break; + case X86::VPBROADCASTWZ256m: NumElts = 16; break; + case X86::VPBROADCASTWZm: NumElts = 32; break; + } + + std::string Comment; + raw_string_ostream CS(Comment); + const MachineOperand &DstOp = MI->getOperand(0); + CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; + CS << "["; + for (int i = 0; i != NumElts; ++i) { + if (i != 0) + CS << ","; + printConstant(C, CS); + } + CS << "]"; + OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + } } MCInst TmpInst; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index b8ec5883152c..6d85ca6cad64 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size. let LoopMicroOpBufferSize = 28; - // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow - // the scheduler to assign a default model to unrecognized opcodes. + // This flag is set to allow the scheduler to assign + // a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -48,6 +48,7 @@ def SBPort23 : ProcResource<2>; def SBPort4 : ProcResource<1>; // Many micro-ops are capable of issuing on multiple ports. +def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; @@ -115,10 +116,10 @@ def : WriteRes { // Scalar and vector floating point. defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; // 10-14 cycles. +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -134,11 +135,11 @@ def : WriteRes { } // Vector integer operations. -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; def : WriteRes { let Latency = 2; @@ -148,13 +149,15 @@ def : WriteRes { let Latency = 6; let ResourceCycles = [1, 1, 1]; } -def : WriteRes { - let Latency = 6; - let ResourceCycles = [1, 1, 1]; +def : WriteRes { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; } -def : WriteRes { - let Latency = 6; - let ResourceCycles = [1, 1, 1, 1]; +def : WriteRes { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } //////////////////////////////////////////////////////////////////////////////// @@ -204,13 +207,15 @@ def : WriteRes { } // Packed Compare Implicit Length Strings, Return Index -def : WriteRes { - let Latency = 3; +def : WriteRes { + let Latency = 11; + let NumMicroOps = 3; let ResourceCycles = [3]; } -def : WriteRes { - let Latency = 3; - let ResourceCycles = [3, 1]; +def : WriteRes { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; } // Packed Compare Explicit Length Strings, Return Index @@ -224,22 +229,26 @@ def : WriteRes { } // AES Instructions. -def : WriteRes { - let Latency = 8; - let ResourceCycles = [2]; +def : WriteRes { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : WriteRes { - let Latency = 8; - let ResourceCycles = [2, 1]; +def : WriteRes { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : WriteRes { - let Latency = 8; +def : WriteRes { + let Latency = 12; + let NumMicroOps = 2; let ResourceCycles = [2]; } -def : WriteRes { - let Latency = 8; - let ResourceCycles = [2, 1]; +def : WriteRes { + let Latency = 18; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } def : WriteRes { @@ -272,4 +281,2407 @@ def : WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; + +// Remaining SNB instrs. + +def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>; + +def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>; +def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>; +def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>; +def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>; + +def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>; +def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>; +def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>; +def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>; +def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>; +def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>; +def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>; + +def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>; + +def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>; +def: InstRW<[SBWriteResGroup4], (instregex "CQO")>; +def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>; + +def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>; + +def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CBW")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMC")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>; +def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>; +def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "STC")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>; + +def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>; +def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>; + +def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>; +def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>; +def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>; + +def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>; + +def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>; + +def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>; + +def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>; + +def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>; + +def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>; + +def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>; + +def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>; + +def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>; + +def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>; + +def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>; + +def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>; +def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>; +def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>; + +def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>; + +def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>; +def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>; +def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>; + +def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>; + +def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>; +def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>; +def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>; + +def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>; + +def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>; + +def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>; + +def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>; +def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>; + +def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>; +def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>; + +def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>; +def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>; + +def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>; + +def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>; +def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>; +def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>; + +def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>; +def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>; + +def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup35], (instregex "CLI")>; +def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>; + +def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>; +def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>; +def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>; + +def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>; + +def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>; + +def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>; + +def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>; + +def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>; + +def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>; +def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>; + +def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>; +def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>; + +def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>; + +def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>; +def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>; + +def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>; + +def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>; + +def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>; +def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>; + +def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>; +def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>; + +def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>; + +def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>; + +def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>; +def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>; +def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>; + +def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>; +def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>; + +def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>; + +def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>; + +def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>; + +def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>; + +def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>; + +def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>; + +def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>; + +def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>; +def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>; + +def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>; +def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>; + +def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>; +def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>; + +def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>; + +def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>; + +def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>; + +def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>; +def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>; + +def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>; +def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>; + +def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>; + +def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>; + +def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>; +def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>; + +def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>; +def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>; +def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>; +def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>; +def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>; + +def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>; + +def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>; + +def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>; +def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>; +def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>; + +def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>; + +def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>; +def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>; + +def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>; + +def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>; + +def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>; +def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>; + +def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>; +def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>; + +def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>; + +def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>; + +def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>; + +def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>; +def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>; +def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>; + +def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>; + +def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>; +def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>; + +def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>; + +def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>; + +def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>; +def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>; + +def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>; + +def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>; + +def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>; +def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>; +def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>; + +def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>; + +def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>; +def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>; +def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>; + +def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,3]; +} +def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>; +def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>; + +def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,2,1]; +} +def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>; + +def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,1,2,1,1]; +} +def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>; + +def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>; + +def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>; + +def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>; + +def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>; + +def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>; + +def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>; + +def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>; +def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>; +def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>; + +def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>; +def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>; + +def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>; + +def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>; +def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>; + +def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>; +def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>; +def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>; +def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>; + +def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>; +def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>; +def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>; + +def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>; + +def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>; + +def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>; + +def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>; + +def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 14; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>; + +def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>; + +def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>; +def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>; + +def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>; +def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>; + +def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>; + +def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 18; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>; +def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>; + +def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 20; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>; +def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>; + +def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>; + +def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>; + +def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> { + let Latency = 22; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>; +def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>; + +def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> { + let Latency = 24; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>; + +def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 28; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>; +def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>; + +def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 29; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>; +def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>; + +def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 31; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>; + +def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 34; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>; + +def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 36; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>; +def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>; + +def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 45; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>; +def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>; + +def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 52; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>; +def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>; + +def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> { + let Latency = 114; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>; + } // SchedModel diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 6cb2a3694d92..ed53893b779c 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -369,5 +369,82 @@ def : WriteRes { let Latency = 100; } def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// AVX instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteFAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFAddY], (instregex "VADD(SUB)?P(S|D)Yrr", "VSUBP(S|D)Yrr")>; + +def WriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFAddYLd, ReadAfterLd], (instregex "VADD(SUB)?P(S|D)Yrm", "VSUBP(S|D)Yrm")>; + +def WriteFDivY: SchedWriteRes<[JFPU1]> { + let Latency = 38; + let ResourceCycles = [38]; +} +def : InstRW<[WriteFDivY], (instregex "VDIVP(D|S)Yrr")>; + +def WriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 43; + let ResourceCycles = [1, 38]; +} +def : InstRW<[WriteFDivYLd, ReadAfterLd], (instregex "VDIVP(S|D)Yrm")>; + +def WriteVMULYPD: SchedWriteRes<[JFPU1]> { + let Latency = 4; + let ResourceCycles = [4]; +} +def : InstRW<[WriteVMULYPD], (instregex "VMULPDYrr")>; + +def WriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 9; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instregex "VMULPDYrm")>; + +def WriteVMULYPS: SchedWriteRes<[JFPU1]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVMULYPS], (instregex "VMULPSYrr", "VRCPPSYr", "VRSQRTPSYr")>; + +def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; + +def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { + let Latency = 54; + let ResourceCycles = [54]; +} +def : InstRW<[WriteVSQRTYPD], (instregex "VSQRTPDYr")>; + +def WriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 59; + let ResourceCycles = [1, 54]; +} +def : InstRW<[WriteVSQRTYPDLd], (instregex "VSQRTPDYm")>; + +def WriteVSQRTYPS: SchedWriteRes<[JFPU1]> { + let Latency = 42; + let ResourceCycles = [42]; +} +def : InstRW<[WriteVSQRTYPS], (instregex "VSQRTPSYr")>; + +def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 47; + let ResourceCycles = [1, 42]; +} +def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>; + } // SchedModel diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5ba8534d32d3..c9924f264939 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -142,10 +142,15 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::v2f64, 69 }, // divpd { ISD::FADD, MVT::v2f64, 2 }, // addpd { ISD::FSUB, MVT::v2f64, 2 }, // subpd - // v2i64/v4i64 mul is custom lowered as a series of long - // multiplies(3), shifts(3) and adds(2). - // slm muldq version throughput is 2 - { ISD::MUL, MVT::v2i64, 11 }, + // v2i64/v4i64 mul is custom lowered as a series of long: + // multiplies(3), shifts(3) and adds(2) + // slm muldq version throughput is 2 and addq throughput 4 + // thus: 3X2 (muldq throughput) + 3X1 (shift throuput) + + // 3X4 (addq throughput) = 17 + { ISD::MUL, MVT::v2i64, 17 }, + // slm addq\subq throughput is 4 + { ISD::ADD, MVT::v2i64, 4 }, + { ISD::SUB, MVT::v2i64, 4 }, }; if (ST->isSLM()) { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index d8cf8d3f5da2..53223ab44316 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -124,6 +124,10 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; + + // There may be remaining metadata uses of the argument for things like + // llvm.dbg.value. Replace them with undef. + I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 6d34ab8b0d96..233a36d2bc54 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -64,6 +64,12 @@ static cl::opt ImportHotMultiplier( "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"), cl::desc("Multiply the `import-instr-limit` threshold for hot callsites")); +static cl::opt ImportCriticalMultiplier( + "import-critical-multiplier", cl::init(100.0), cl::Hidden, + cl::value_desc("x"), + cl::desc( + "Multiply the `import-instr-limit` threshold for critical callsites")); + // FIXME: This multiplier was not really tuned up. static cl::opt ImportColdMultiplier( "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"), @@ -207,6 +213,8 @@ static void computeImportForFunction( return ImportHotMultiplier; if (Hotness == CalleeInfo::HotnessType::Cold) return ImportColdMultiplier; + if (Hotness == CalleeInfo::HotnessType::Critical) + return ImportCriticalMultiplier; return 1.0; }; @@ -537,8 +545,6 @@ void llvm::thinLTOResolveWeakForLinkerModule( }; auto updateLinkage = [&](GlobalValue &GV) { - if (!GlobalValue::isWeakForLinker(GV.getLinkage())) - return; // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) @@ -546,6 +552,21 @@ void llvm::thinLTOResolveWeakForLinkerModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; + + // Switch the linkage to weakany if asked for, e.g. we do this for + // linker redefined symbols (via --wrap or --defsym). + // We record that the visibility should be changed here in `addThinLTO` + // as we need access to the resolution vectors for each input file in + // order to find which symbols have been redefined. + // We may consider reorganizing this code and moving the linkage recording + // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex. + if (NewLinkage == GlobalValue::WeakAnyLinkage) { + GV.setLinkage(NewLinkage); + return; + } + + if (!GlobalValue::isWeakForLinker(GV.getLinkage())) + return; // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index f277a51ae659..3d57acf06e74 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, if (StoreInst *SI = dyn_cast(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction*)ICI : LI); InitBoolUsed = true; switch (ICI->getPredicate()) { @@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa(StoredVal) || isa(StoredVal)) && "This is not a form that we understand!"); @@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } new StoreInst(StoreVal, NewGV, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast(UI); LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); Value *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index ad89e40661c6..00ddb93df830 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -989,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + + if (!Changed) + return PreservedAnalyses::all(); + + // Even if we change the IR, we update the core CGSCC data structures and so + // can preserve the proxy to the function analysis manager. + PreservedAnalyses PA; + PA.preserve(); + return PA; } diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index b406c22c69d7..693df5e7ba92 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -855,15 +855,20 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); - } else { - // Definition. - assert(isDefinition); + } else if (isDefinition) { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); F->setVisibility(GlobalValue::HiddenVisibility); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, Name, &M); FDecl->setVisibility(Visibility); + } else { + // Function definition without type metadata, where some other translation + // unit contained a declaration with type metadata. This normally happens + // during mixed CFI + non-CFI compilation. We do nothing with the function + // so that it is treated the same way as a function defined outside of the + // LTO unit. + return; } if (F->isWeakForLinker()) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 5b1b58b89c32..0b319f6a488b 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -188,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() { static ManagedStatic, 8> > GlobalExtensions; +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { @@ -200,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { - for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) - if ((*GlobalExtensions)[i].first == ETy) - (*GlobalExtensions)[i].second(*this, PM); + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (Ext.first == ETy) + Ext.second(*this, PM); + } + } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) if (Extensions[i].first == ETy) Extensions[i].second(*this, PM); @@ -415,7 +425,7 @@ void PassManagerBuilder::populateModulePassManager( // builds. The function merging pass is if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 8d494fe9cde2..8ef6bb652309 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -271,7 +271,8 @@ void splitAndWriteThinLTOBitcode( if (!ArgT || ArgT->getBitWidth() > 64) return; } - if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) + if (!F->isDeclaration() && + computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) EligibleVirtualFns.insert(F); }); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d5f0dd191415..809471cfd74f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -164,7 +164,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} + FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -187,7 +187,7 @@ namespace { Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst, bool NoNumber = false); - InstCombiner::BuilderTy *Builder; + InstCombiner::BuilderTy &Builder; Instruction *Instr; // Debugging stuff are clustered here. @@ -735,7 +735,7 @@ Value *FAddCombine::createNaryFAdd } Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFSub(Opnd0, Opnd1); + Value *V = Builder.CreateFSub(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; @@ -750,21 +750,21 @@ Value *FAddCombine::createFNeg(Value *V) { } Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + Value *V = Builder.CreateFAdd(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFMul(Opnd0, Opnd1); + Value *V = Builder.CreateFMul(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + Value *V = Builder.CreateFDiv(Opnd0, Opnd1); if (Instruction *I = dyn_cast(V)) createInstPostProc(I); return V; @@ -895,7 +895,7 @@ bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even static Value *checkForNegativeOperand(BinaryOperator &I, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // This function creates 2 instructions to replace ADD, we need at least one @@ -919,13 +919,13 @@ static Value *checkForNegativeOperand(BinaryOperator &I, // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { - Value *NewAnd = Builder->CreateAnd(Z, *C1); - return Builder->CreateSub(RHS, NewAnd, "sub"); + Value *NewAnd = Builder.CreateAnd(Z, *C1); + return Builder.CreateSub(RHS, NewAnd, "sub"); } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) - Value *NewOr = Builder->CreateOr(Z, ~(*C1)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C1)); + return Builder.CreateSub(RHS, NewOr, "sub"); } } } @@ -944,8 +944,8 @@ static Value *checkForNegativeOperand(BinaryOperator &I, if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) if (C1->countTrailingZeros() == 0) if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { - Value *NewOr = Builder->CreateOr(Z, ~(*C2)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C2)); + return Builder.CreateSub(RHS, NewOr, "sub"); } return nullptr; } @@ -1027,7 +1027,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Instruction *X = foldAddWithConstant(I, *Builder)) + if (Instruction *X = foldAddWithConstant(I, Builder)) return X; // FIXME: This should be moved into the above helper function to allow these @@ -1060,7 +1060,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); - Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); + Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } @@ -1084,7 +1084,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *NV = foldOpWithConstantIntoOperand(I)) return NV; - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 @@ -1101,7 +1101,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *LHSV = dyn_castNegVal(LHS)) { if (!isa(RHS)) if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } @@ -1148,7 +1148,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -1191,7 +1191,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -1208,7 +1208,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -1227,7 +1227,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); return new ZExtInst(NewAdd, I.getType()); } } @@ -1244,7 +1244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowUnsignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNUWAdd( + Value *NewAdd = Builder.CreateNUWAdd( LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new ZExtInst(NewAdd, I.getType()); } @@ -1362,8 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getSIToFP(CI, I.getType()) == CFP && willNotOverflowSignedAdd(LHSIntVal, CI, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1381,8 +1380,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1480,14 +1478,14 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder->CreateSub(Result, Offset); + Result = Builder.CreateSub(Result, Offset); } // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); + Result = Builder.CreateNeg(Result, "diff.neg"); - return Builder->CreateIntCast(Result, Ty, true); + return Builder.CreateIntCast(Result, Ty, true); } Instruction *InstCombiner::visitSub(BinaryOperator &I) { @@ -1522,7 +1520,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return Res; } - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(Op0, Op1); // Replace (-1 - A) with (~A). @@ -1552,12 +1550,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Fold (sub 0, (zext bool to B)) --> (sext bool to B) if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateSExtOrBitCast(X, Op1->getType()); // Fold (sub 0, (sext bool to B)) --> (zext bool to B) if (C->isNullValue() && match(Op1, m_SExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } @@ -1615,7 +1613,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // ((X | Y) - X) --> (~X & Y) if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1))))) return BinaryOperator::CreateAnd( - Y, Builder->CreateNot(Op1, Op1->getName() + ".not")); + Y, Builder.CreateNot(Op1, Op1->getName() + ".not")); } if (Op1->hasOneUse()) { @@ -1625,13 +1623,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) return BinaryOperator::CreateAdd(Op0, - Builder->CreateSub(Z, Y, Op1->getName())); + Builder.CreateSub(Z, Y, Op1->getName())); // (X - (X & Y)) --> (X & ~Y) // if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateAnd(Op0, - Builder->CreateNot(Y, Y->getName() + ".not")); + Builder.CreateNot(Y, Y->getName() + ".not")); // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && @@ -1648,7 +1646,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // 'nuw' is dropped in favor of the canonical form. if (match(Op1, m_SExt(m_Value(Y))) && Y->getType()->getScalarSizeInBits() == 1) { - Value *Zext = Builder->CreateZExt(Y, I.getType()); + Value *Zext = Builder.CreateZExt(Y, I.getType()); BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext); Add->setHasNoSignedWrap(I.hasNoSignedWrap()); return Add; @@ -1659,13 +1657,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *A, *B; Constant *CI; if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B))))) - return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B)); // X - A*CI -> X + A*-CI // No need to handle commuted multiply because multiply handling will // ensure constant will be move to the right hand side. if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) { - Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI)); return BinaryOperator::CreateAdd(Op0, NewMul); } } @@ -1729,14 +1727,14 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { } if (FPTruncInst *FPTI = dyn_cast(Op1)) { if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) { - Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType()); + Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc); NewI->copyFastMathFlags(&I); return NewI; } } else if (FPExtInst *FPEI = dyn_cast(Op1)) { if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) { - Value *NewExt = Builder->CreateFPExt(V, I.getType()); + Value *NewExt = Builder.CreateFPExt(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt); NewI->copyFastMathFlags(&I); return NewI; diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index db98be2c98f5..773c86e23707 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -54,17 +54,17 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) { /// instruction. The sign is passed in to determine which kind of predicate to /// use in the new icmp instruction. static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate NewPred; if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) return NewConstant; - return Builder->CreateICmp(NewPred, LHS, RHS); + return Builder.CreateICmp(NewPred, LHS, RHS); } /// This is the complement of getFCmpCode, which turns an opcode and two /// operands into either a FCmp instruction, or a true/false constant. static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const auto Pred = static_cast(Code); assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE && "Unexpected FCmp predicate!"); @@ -72,53 +72,45 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - return Builder->CreateFCmp(Pred, LHS, RHS); + return Builder.CreateFCmp(Pred, LHS, RHS); } -/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) +/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or +/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) /// \param I Binary operator to transform. /// \return Pointer to node that must replace the original binary operator, or /// null pointer if no transformation was made. -Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { - IntegerType *ITy = dyn_cast(I.getType()); - - // Can't do vectors. - if (I.getType()->isVectorTy()) - return nullptr; - - // Can only do bitwise ops. - if (!I.isBitwiseLogicOp()) - return nullptr; +static Value *SimplifyBSwap(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); Value *OldLHS = I.getOperand(0); Value *OldRHS = I.getOperand(1); - ConstantInt *ConstLHS = dyn_cast(OldLHS); - ConstantInt *ConstRHS = dyn_cast(OldRHS); - IntrinsicInst *IntrLHS = dyn_cast(OldLHS); - IntrinsicInst *IntrRHS = dyn_cast(OldRHS); - bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); - bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); - if (!IsBswapLHS && !IsBswapRHS) + Value *NewLHS; + if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) return nullptr; - if (!IsBswapLHS && !ConstLHS) + Value *NewRHS; + const APInt *C; + + if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { + // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) + if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) + return nullptr; + // NewRHS initialized by the matcher. + } else if (match(OldRHS, m_APInt(C))) { + // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) + if (!OldLHS->hasOneUse()) + return nullptr; + NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); + } else return nullptr; - if (!IsBswapRHS && !ConstRHS) - return nullptr; - - /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : - Builder->getInt(ConstLHS->getValue().byteSwap()); - - Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : - Builder->getInt(ConstRHS->getValue().byteSwap()); - - Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy); - return Builder->CreateCall(F, BinOp); + Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); + Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, + I.getType()); + return Builder.CreateCall(F, BinOp); } /// This handles expressions of the form ((val OP C1) & C2). Where @@ -137,7 +129,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, case Instruction::Xor: if (Op->hasOneUse()) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); + Value *And = Builder.CreateAnd(X, AndRHS); And->takeName(Op); return BinaryOperator::CreateXor(And, Together); } @@ -150,7 +142,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, // NOTE: This reduces the number of bits set in the & mask, which // can expose opportunities for store narrowing. Together = ConstantExpr::getXor(AndRHS, Together); - Value *And = Builder->CreateAnd(X, Together); + Value *And = Builder.CreateAnd(X, Together); And->takeName(Op); return BinaryOperator::CreateOr(And, OpRHS); } @@ -182,7 +174,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, return &TheAnd; } else { // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); + Value *NewAnd = Builder.CreateAnd(X, AndRHS); NewAnd->takeName(Op); return BinaryOperator::CreateXor(NewAnd, AndRHS); } @@ -198,7 +190,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask); if (CI->getValue() == ShlMask) // Masking out bits that the shift already masks. @@ -218,7 +210,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask); if (CI->getValue() == ShrMask) // Masking out bits that the shift already masks. @@ -238,12 +230,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask); + Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName()); return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } @@ -269,15 +261,15 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred; - return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); + return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); } // V >= Lo && V < Hi --> V - Lo u< Hi - Lo // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo Value *VMinusLo = - Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); + Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo); - return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo); + return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo); } /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns @@ -523,7 +515,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - llvm::InstCombiner::BuilderTy *Builder) { + llvm::InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); unsigned Mask = @@ -556,27 +548,27 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); // We can't use C as zero because we might actually handle // (icmp ne (A & B), B) & (icmp ne (A & D), D) // with B and D, having a single bit set. Value *Zero = Constant::getNullValue(A->getType()); - return Builder->CreateICmp(NewCC, NewAnd, Zero); + return Builder.CreateICmp(NewCC, NewAnd, Zero); } if (Mask & BMask_AllOnes) { // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); - return Builder->CreateICmp(NewCC, NewAnd, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); + return Builder.CreateICmp(NewCC, NewAnd, NewOr); } if (Mask & AMask_AllOnes) { // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) - Value *NewAnd1 = Builder->CreateAnd(B, D); - Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1); - return Builder->CreateICmp(NewCC, NewAnd2, A); + Value *NewAnd1 = Builder.CreateAnd(B, D); + Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); + return Builder.CreateICmp(NewCC, NewAnd2, A); } // Remaining cases assume at least that B and D are constant, and depend on @@ -644,10 +636,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, (CCst->getValue() ^ ECst->getValue())).getBoolValue()) return ConstantInt::get(LHS->getType(), !IsAnd); - Value *NewOr1 = Builder->CreateOr(B, D); + Value *NewOr1 = Builder.CreateOr(B, D); Value *NewOr2 = ConstantExpr::getOr(CCst, ECst); - Value *NewAnd = Builder->CreateAnd(A, NewOr1); - return Builder->CreateICmp(NewCC, NewAnd, NewOr2); + Value *NewAnd = Builder.CreateAnd(A, NewOr1); + return Builder.CreateICmp(NewCC, NewAnd, NewOr2); } return nullptr; @@ -705,13 +697,13 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, if (Inverted) NewPred = ICmpInst::getInversePredicate(NewPred); - return Builder->CreateICmp(NewPred, Input, RangeEnd); + return Builder.CreateICmp(NewPred, Input, RangeEnd); } static Value * foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, bool JoinedByAnd, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *X = LHS->getOperand(0); if (X != RHS->getOperand(0)) return nullptr; @@ -742,8 +734,8 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2 // We choose an 'or' with a Pow2 constant rather than the inverse mask with // 'and' because that may lead to smaller codegen from a smaller constant. - Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor)); - return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); + Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor)); + return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); } // Special case: get the ordering right when the values wrap around zero. @@ -755,9 +747,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // (X == 13 || X == 14) --> X - 13 <=u 1 // (X != 13 && X != 14) --> X - 13 >u 1 // An 'add' is the canonical IR form, so favor that over a 'sub'. - Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); + Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE; - return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); + return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); } return nullptr; @@ -793,10 +785,10 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, if (A == C && isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) && isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) { - Value *Mask = Builder->CreateOr(B, D); - Value *Masked = Builder->CreateAnd(A, Mask); + Value *Mask = Builder.CreateOr(B, D); + Value *Masked = Builder.CreateAnd(A, Mask); auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; - return Builder->CreateICmp(NewPred, Masked, Mask); + return Builder.CreateICmp(NewPred, Masked, Mask); } } @@ -855,8 +847,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) || (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -888,10 +880,10 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); if ((Low & AndC->getValue()).isNullValue() && (Low & BigC->getValue()).isNullValue()) { - Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue()); + Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue()); APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue(); Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N); - return Builder->CreateICmp(PredL, NewAnd, NewVal); + return Builder.CreateICmp(PredL, NewAnd, NewVal); } } } @@ -943,14 +935,14 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13 - return Builder->CreateICmpULT(LHS0, LHSC); - if (LHSC->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13 + return Builder.CreateICmpULT(LHS0, LHSC); + if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 - return Builder->CreateICmpSLT(LHS0, LHSC); + return Builder.CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. @@ -963,7 +955,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) getValue() + 1, RHSC->getValue(), @@ -976,7 +968,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true, @@ -1025,15 +1017,15 @@ Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // false. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getFalse(); - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.getFalse(); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa(LHS->getOperand(1)) && isa(RHS->getOperand(1))) - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1088,7 +1080,7 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) { /// Fold {and,or,xor} (cast X), C. static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Constant *C; if (!match(Logic.getOperand(1), m_Constant(C))) return nullptr; @@ -1107,7 +1099,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy); if (ZextTruncC == C) { // LogicOpc (zext X), C --> zext (LogicOpc X, C) - Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC); + Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC); return new ZExtInst(NewOp, DestTy); } } @@ -1150,7 +1142,7 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { - Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src, + Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); } @@ -1196,15 +1188,14 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { // Fold (and (sext bool to A), B) --> (select bool, B, 0) Value *X = nullptr; - if (match(Op0, m_SExt(m_Value(X))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op1->getType()); return SelectInst::Create(X, Op1, Zero); } // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) if (match(Op0, m_Not(m_SExt(m_Value(X)))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op0->getType()); return SelectInst::Create(X, Zero, Op1); } @@ -1283,14 +1274,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldAndToXor(I, *Builder)) + if (Instruction *Xor = foldAndToXor(I, Builder)) return Xor; // (A|B)&(A|C) -> A|(B&C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); if (ConstantInt *AndRHS = dyn_cast(Op1)) { @@ -1310,15 +1301,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) { // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); + Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) { // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); + Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } @@ -1337,7 +1328,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (1 >> x) & 1 --> zext(x == 0) if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) { Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); } break; @@ -1360,11 +1351,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType()); Value *BinOp; if (isa(Op0LHS)) - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1); else - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X); auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType()); - auto *And = Builder->CreateAnd(BinOp, TruncC2); + auto *And = Builder.CreateAnd(BinOp, TruncC2); return new ZExtInst(And, I.getType()); } } @@ -1384,7 +1375,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // into : and (trunc X to T), trunc(YC) & C2 // This will fold the two constants together, which may allow // other simplifications. - Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk"); Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); C3 = ConstantExpr::getAnd(C3, AndRHS); return BinaryOperator::CreateAnd(NewCast, C3); @@ -1396,7 +1387,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) return FoldedLogic; - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; { @@ -1422,7 +1413,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // an endless loop. By checking that A is non-constant we ensure that // we will never get to the loop. if (A == tmpOp0 && !isa(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); } } @@ -1436,13 +1427,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) - return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C)); + return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) - return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); + return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) // (A | B) & (B ^ (~A)) -> (A & B) @@ -1474,18 +1465,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } } @@ -1567,14 +1558,14 @@ static Value *getSelectCondition(Value *A, Value *B, InstCombiner::BuilderTy &Builder) { // If these are scalars or vectors of i1, A can be used directly. Type *Ty = A->getType(); - if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1)) + if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1)) return A; // If A and B are sign-extended, look through the sexts to find the booleans. Value *Cond; Value *NotB; if (match(A, m_SExt(m_Value(Cond))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && + Cond->getType()->isIntOrIntVectorTy(1) && match(B, m_OneUse(m_Not(m_Value(NotB))))) { NotB = peekThroughBitcast(NotB, true); if (match(NotB, m_SExt(m_Specific(Cond)))) @@ -1596,7 +1587,7 @@ static Value *getSelectCondition(Value *A, Value *B, // operand, see if the constants are inverse bitmasks. if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) && match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && + Cond->getType()->isIntOrIntVectorTy(1) && areInverseVectorBitmasks(AC, BC)) { AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty)); return Builder.CreateXor(Cond, AC); @@ -1687,9 +1678,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, RangeDiff.ugt(LHSC->getValue())) { Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); - Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC); - Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC); - return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC)); + Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC); + Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); + return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } } } @@ -1736,9 +1727,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, A = LHS->getOperand(1); } if (A && B) - return Builder->CreateICmp( + return Builder.CreateICmp( ICmpInst::ICMP_UGE, - Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); + Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); } // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n @@ -1759,8 +1750,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (LHSC == RHSC && PredL == PredR) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -1770,7 +1761,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *AddC; if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC)))) if (RHSC->getValue() + AddC->getValue() == LHSC->getValue()) - return Builder->CreateICmpULE(LHS0, LHSC); + return Builder.CreateICmpULE(LHS0, LHSC); } // From here on, we only handle: @@ -1886,18 +1877,18 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getTrue(); + return Builder.getTrue(); // Otherwise, no need to compare the two constants, compare the // rest. - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa(LHS->getOperand(1)) && isa(RHS->getOperand(1))) - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1916,7 +1907,7 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// when the XOR of the two constants is "all ones" (-1). static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast(C); if (!CI1) return nullptr; @@ -1928,7 +1919,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } @@ -1946,7 +1937,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, /// when the XOR of the two constants is "all ones" (-1). static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast(C); if (!CI1) return nullptr; @@ -1961,7 +1952,7 @@ static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1); + Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1); return BinaryOperator::CreateXor(NewOp, V1); } @@ -1987,14 +1978,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldOrToXor(I, *Builder)) + if (Instruction *Xor = foldOrToXor(I, Builder)) return Xor; // (A&B)|(A&C) -> A&(B|C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); if (isa(Op1)) @@ -2011,7 +2002,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op1, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op1); + Value *NOr = Builder.CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2020,7 +2011,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op0, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op0); + Value *NOr = Builder.CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2058,7 +2049,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == B && MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(A, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // Or commutes, try both ways. if (match(B, m_Or(m_Value(V1), m_Value(V2))) && ((V1 == A && @@ -2066,7 +2057,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == A && MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(B, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. @@ -2075,9 +2066,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (C3->getValue() & ~C1->getValue()).isNullValue() && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && (C4->getValue() & ~C2->getValue()).isNullValue()) { - V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); } } } @@ -2087,21 +2078,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // 'or' that it is replacing. if (Op0->hasOneUse() || Op1->hasOneUse()) { // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. - if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder)) return replaceInstUsesWith(I, V); } @@ -2139,9 +2130,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // ((B | C) & A) | B -> B | (A & C) if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) - return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); + return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; // Canonicalize xor to the RHS. @@ -2163,11 +2154,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(B, B->getName()+".not"); + Value *Not = Builder.CreateNot(B, B->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(A, A->getName()+".not"); + Value *Not = Builder.CreateNot(A, A->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } } @@ -2181,7 +2172,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { B->getOpcode() == Instruction::Xor)) { Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : B->getOperand(0); - Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } @@ -2194,7 +2185,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // xor was canonicalized to Op1 above. if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && match(Op0, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(Builder->CreateNot(A), B); + return BinaryOperator::CreateXor(Builder.CreateNot(A), B); if (SwappedForXor) std::swap(Op0, Op1); @@ -2212,18 +2203,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast(X)) if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast(Y)) if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } } @@ -2238,10 +2229,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or . if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); // Note: If we've gotten to the point of visiting the outer OR, then the @@ -2252,7 +2243,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1; if (Op0->hasOneUse() && !isa(Op1) && match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { - Value *Inner = Builder->CreateOr(A, Op1); + Value *Inner = Builder.CreateOr(A, Op1); Inner->takeName(Op0); return BinaryOperator::CreateOr(Inner, C1); } @@ -2265,8 +2256,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { - Value *orTrue = Builder->CreateOr(A, C); - Value *orFalse = Builder->CreateOr(B, D); + Value *orTrue = Builder.CreateOr(A, C); + Value *orFalse = Builder.CreateOr(B, D); return SelectInst::Create(X, orTrue, orFalse); } } @@ -2276,7 +2267,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { /// A ^ B can be specified using other logic ops in a variety of patterns. We /// can fold these early and efficiently by morphing an existing instruction. -static Instruction *foldXorToXor(BinaryOperator &I) { +static Instruction *foldXorToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { assert(I.getOpcode() == Instruction::Xor); Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -2323,6 +2315,21 @@ static Instruction *foldXorToXor(BinaryOperator &I) { return &I; } + // For the remaining cases we need to get rid of one of the operands. + if (!Op0->hasOneUse() && !Op1->hasOneUse()) + return nullptr; + + // (A | B) ^ ~(A & B) -> ~(A ^ B) + // (A | B) ^ ~(B & A) -> ~(A ^ B) + // (A & B) ^ ~(A | B) -> ~(A ^ B) + // (A & B) ^ ~(B | A) -> ~(A ^ B) + // Complexity sorting ensures the not will be on the right side. + if ((match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) || + (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + return nullptr; } @@ -2355,12 +2362,12 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS RHS->setPredicate(RHS->getInversePredicate()); - return Builder->CreateAnd(LHS, RHS); + return Builder.CreateAnd(LHS, RHS); } if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS LHS->setPredicate(LHS->getInversePredicate()); - return Builder->CreateAnd(LHS, RHS); + return Builder.CreateAnd(LHS, RHS); } } } @@ -2381,7 +2388,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - if (Instruction *NewXor = foldXorToXor(I)) + if (Instruction *NewXor = foldXorToXor(I, Builder)) return NewXor; // (A&B)^(A&C) -> A&(B^C) etc @@ -2393,7 +2400,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. @@ -2404,13 +2411,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(~X & Y) --> (X | ~Y) // ~(Y & ~X) --> (X | ~Y) if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateOr(X, NotY); } // ~(~X | Y) --> (X & ~Y) // ~(Y | ~X) --> (X & ~Y) if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateAnd(X, NotY); } @@ -2426,8 +2433,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { NotVal->getOperand(0)->hasOneUse()) && IsFreeToInvert(NotVal->getOperand(1), NotVal->getOperand(1)->hasOneUse())) { - Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs"); - Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs"); + Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs"); + Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs"); if (NotVal->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(NotX, NotY); return BinaryOperator::CreateAnd(NotX, NotY); @@ -2457,7 +2464,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } // not (cmp A, B) = !cmp A, B - ICmpInst::Predicate Pred; + CmpInst::Predicate Pred; if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) { cast(Op0)->setPredicate(CmpInst::getInversePredicate(Pred)); return replaceInstUsesWith(I, Op0); @@ -2470,8 +2477,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(), - Op0C->getDestTy()))) { + (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(), + Op0C->getDestTy()))) { CI->setPredicate(CI->getInversePredicate()); return CastInst::Create(Opcode, CI, Op0C->getType()); } @@ -2481,7 +2488,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (BinaryOperator *Op0I = dyn_cast(Op0)) { // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue()) + if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne()) if (Constant *Op0I0C = dyn_cast(Op0I->getOperand(0))) { Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); return BinaryOperator::CreateAdd(Op0I->getOperand(1), @@ -2491,13 +2498,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *Op0CI = dyn_cast(Op0I->getOperand(1))) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X - if (RHSC->isAllOnesValue()) { + if (RHSC->isMinusOne()) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); } else if (RHSC->getValue().isSignMask()) { // (X + C) ^ signmask -> (X + C + signmask) - Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); + Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); } @@ -2530,7 +2537,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { APInt FoldConst = C1->getValue().lshr(C2->getValue()); FoldConst ^= C3->getValue(); // Prepare the two operands. - Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2); Opnd0->takeName(Op0I); cast(Opnd0)->setDebugLoc(I.getDebugLoc()); Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); @@ -2575,14 +2582,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1)); } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) { if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); const APInt *C; if (B == Op1 && // (B&A)^A == ~B & A !match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); + return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1); } } } @@ -2594,20 +2601,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Or(m_Value(A), m_Value(B)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A | B)^(A ^ C) -> ((~A) & B) ^ C if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Xor(m_Value(D), m_Value(C)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A & B) ^ (A ^ B) -> (A | B) if (match(Op0, m_And(m_Value(A), m_Value(B))) && @@ -2624,7 +2631,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *A, *B; if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Not(m_Specific(A)))) - return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); + return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3770021de100..391c430dab75 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -128,23 +128,23 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits, Src->getType()->getPointerAddressSpace()); - Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, - "memcpy_unfold.src_casted"); - Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, - "memcpy_unfold.dst_casted"); + Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); for (uint64_t i = 0; i < NumElements; ++i) { // Get current element addresses ConstantInt *ElementIdxCI = ConstantInt::get(AMI->getContext(), APInt(64, i)); Value *SrcElementAddr = - Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); Value *DstElementAddr = - Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); // Load from the source. Transfer alignment information and mark load as // unordered atomic. - LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val"); Load->setOrdering(AtomicOrdering::Unordered); // We know alignment of the first element. It is also guaranteed by the // verifier that element size is less or equal than first element @@ -157,7 +157,7 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( Load->setDebugLoc(AMI->getDebugLoc()); // Store loaded value via unordered atomic store. - StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); + StoreInst *Store = Builder.CreateStore(Load, DstElementAddr); Store->setOrdering(AtomicOrdering::Unordered); Store->setAlignment(i == 0 ? AMI->getParamAlignment(0) : ElementSizeInBytes); @@ -213,7 +213,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { if (M->getNumOperands() == 3 && M->getOperand(0) && mdconst::hasa(M->getOperand(0)) && - mdconst::extract(M->getOperand(0))->isNullValue() && + mdconst::extract(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa(M->getOperand(1)) && mdconst::extract(M->getOperand(1))->getValue() == @@ -227,9 +227,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); + LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -238,7 +238,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (LoopMemParallelMD) L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); - StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -274,15 +274,15 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Value *Dest = MI->getDest(); unsigned DstAddrSp = cast(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); - Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); + Dest = Builder.CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, - MI->isVolatile()); + StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. @@ -600,8 +600,7 @@ static Value *simplifyX86muldq(const IntrinsicInst &II, return Builder.CreateMul(LHS, RHS); } -static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, - InstCombiner::BuilderTy &Builder, bool IsSigned) { +static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) { Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); Type *ResTy = II.getType(); @@ -676,8 +675,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, return ConstantVector::get(Vals); } -static Value *simplifyX86movmsk(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder) { +static Value *simplifyX86movmsk(const IntrinsicInst &II) { Value *Arg = II.getArgOperand(0); Type *ResTy = II.getType(); Type *ArgTy = Arg->getType(); @@ -860,7 +858,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, } // Constant Fold - extraction from zero is always {zero, undef}. - if (CI0 && CI0->equalsInt(0)) + if (CI0 && CI0->isZero()) return LowConstantHighUndef(0); return nullptr; @@ -1404,7 +1402,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { - II.setOperand(1, IC.Builder->getTrue()); + II.setOperand(1, IC.Builder.getTrue()); return &II; } } @@ -1477,7 +1475,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). @@ -1485,7 +1483,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // The pass-through vector for an x86 masked load is a zero vector. CallInst *NewMaskedLoad = - IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); + IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); return IC.replaceInstUsesWith(II, NewMaskedLoad); } @@ -1520,13 +1518,13 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); - IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask); + IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask); // 'Replace uses' doesn't work for stores. Erase the original masked store. IC.eraseInstFromFunction(II); @@ -1764,7 +1762,7 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but // need special handling. // - // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just + // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just // as well. case Intrinsic::nvvm_rcp_rn_d: return {SPC_Reciprocal, FTZ_Any}; @@ -1965,16 +1963,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); + return replaceInstUsesWith(CI, X); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - IIOperand->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(X->getType(), C); - Value *V = Builder->CreateLShr(X, CV); + Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); } break; @@ -1984,6 +1983,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bitreverse(bitreverse(x)) -> x if (match(IIOperand, m_BitReverse(m_Value(X)))) return replaceInstUsesWith(CI, X); @@ -1991,7 +1991,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::masked_load: - if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder)) + if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); break; case Intrinsic::masked_store: @@ -2010,7 +2010,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Power->isOne()) return replaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x - if (Power->isAllOnesValue()) + if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); } @@ -2073,11 +2073,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. if (II->hasUnsafeAlgebra()) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(II->getFastMathFlags()); - Value *Mul = Builder->CreateFMul(II->getArgOperand(0), - II->getArgOperand(1)); - Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2)); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(II->getFastMathFlags()); + Value *Mul = Builder.CreateFMul(II->getArgOperand(0), + II->getArgOperand(1)); + Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2)); Add->takeName(II); return replaceInstUsesWith(*II, Add); } @@ -2128,8 +2128,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *LHS, *RHS; if (match(II->getArgOperand(0), m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { - CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); - CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS}); return SelectInst::Create(Cond, Call0, Call1); } @@ -2147,7 +2147,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // fabs (fpext x) -> fpext (fabs x) Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), { ExtSrc->getType() }); - CallInst *NewFabs = Builder->CreateCall(F, ExtSrc); + CallInst *NewFabs = Builder.CreateCall(F, ExtSrc); NewFabs->copyFastMathFlags(II); NewFabs->takeName(II); return new FPExtInst(NewFabs, II->getType()); @@ -2174,7 +2174,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2182,8 +2182,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: { // Turn PPC VSX loads into normal loads. - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), - PointerType::getUnqual(II->getType())); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); return new LoadInst(Ptr, Twine(""), false, 1); } case Intrinsic::ppc_altivec_stvx: @@ -2193,7 +2193,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2201,18 +2201,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_stxvd2x: { // Turn PPC VSX stores into normal stores. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr, false, 1); } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getType()->getVectorNumElements()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(VTy)); - Value *Load = Builder->CreateLoad(Ptr); + Value *Load = Builder.CreateLoad(Ptr); return new FPExtInst(Load, II->getType()); } break; @@ -2220,7 +2220,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvlfd -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, &DT) >= 32) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2229,11 +2229,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvstfs -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getArgOperand(0)->getType()->getVectorNumElements()); - Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy); + Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy); Type *OpPtrTy = PointerType::getUnqual(VTy); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(TOp, Ptr); } break; @@ -2243,7 +2243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2272,15 +2272,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SmallVector SubVecMask; for (unsigned i = 0; i != RetWidth; ++i) SubVecMask.push_back((int)i); - VectorHalfAsShorts = Builder->CreateShuffleVector( + VectorHalfAsShorts = Builder.CreateShuffleVector( Arg, UndefValue::get(ArgType), SubVecMask); } auto VectorHalfType = VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); auto VectorHalfs = - Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType); - auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType); + Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType); + auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType); return replaceInstUsesWith(*II, VectorFloats); } @@ -2334,7 +2334,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_avx_movmsk_ps_256: case Intrinsic::x86_avx2_pmovmskb: { - if (Value *V = simplifyX86movmsk(*II, *Builder)) + if (Value *V = simplifyX86movmsk(*II)) return replaceInstUsesWith(*II, V); break; } @@ -2437,25 +2437,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ps_512: case Intrinsic::x86_avx512_mask_add_pd_512: - V = Builder->CreateFAdd(Arg0, Arg1); + V = Builder.CreateFAdd(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_sub_ps_512: case Intrinsic::x86_avx512_mask_sub_pd_512: - V = Builder->CreateFSub(Arg0, Arg1); + V = Builder.CreateFSub(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_mul_ps_512: case Intrinsic::x86_avx512_mask_mul_pd_512: - V = Builder->CreateFMul(Arg0, Arg1); + V = Builder.CreateFMul(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_div_ps_512: case Intrinsic::x86_avx512_mask_div_pd_512: - V = Builder->CreateFDiv(Arg0, Arg1); + V = Builder.CreateFDiv(Arg0, Arg1); break; } // Create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } } @@ -2476,27 +2476,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Extract the element as scalars. Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); - Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0); - Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0); + Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0); + Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0); Value *V; switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_add_sd_round: - V = Builder->CreateFAdd(LHS, RHS); + V = Builder.CreateFAdd(LHS, RHS); break; case Intrinsic::x86_avx512_mask_sub_ss_round: case Intrinsic::x86_avx512_mask_sub_sd_round: - V = Builder->CreateFSub(LHS, RHS); + V = Builder.CreateFSub(LHS, RHS); break; case Intrinsic::x86_avx512_mask_mul_ss_round: case Intrinsic::x86_avx512_mask_mul_sd_round: - V = Builder->CreateFMul(LHS, RHS); + V = Builder.CreateFMul(LHS, RHS); break; case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_div_sd_round: - V = Builder->CreateFDiv(LHS, RHS); + V = Builder.CreateFDiv(LHS, RHS); break; } @@ -2506,18 +2506,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // We don't need a select if we know the mask bit is a 1. if (!C || !C->getValue()[0]) { // Cast the mask to an i1 vector and then extract the lowest element. - auto *MaskTy = VectorType::get(Builder->getInt1Ty(), + auto *MaskTy = VectorType::get(Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth()); - Mask = Builder->CreateBitCast(Mask, MaskTy); - Mask = Builder->CreateExtractElement(Mask, (uint64_t)0); + Mask = Builder.CreateBitCast(Mask, MaskTy); + Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); // Extract the lowest element from the passthru operand. - Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2), + Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2), (uint64_t)0); - V = Builder->CreateSelect(Mask, V, Passthru); + V = Builder.CreateSelect(Mask, V, Passthru); } // Insert the result back into the original argument 0. - V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0); + V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0); return replaceInstUsesWith(*II, V); } @@ -2598,7 +2598,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_pslli_d_512: case Intrinsic::x86_avx512_pslli_q_512: case Intrinsic::x86_avx512_pslli_w_512: - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2629,7 +2629,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psll_d_512: case Intrinsic::x86_avx512_psll_q_512: case Intrinsic::x86_avx512_psll_w_512: { - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector @@ -2673,7 +2673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psrlv_w_128: case Intrinsic::x86_avx512_psrlv_w_256: case Intrinsic::x86_avx512_psrlv_w_512: - if (Value *V = simplifyX86varShift(*II, *Builder)) + if (Value *V = simplifyX86varShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2683,7 +2683,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_pmulu_dq: case Intrinsic::x86_avx512_pmul_dq_512: case Intrinsic::x86_avx512_pmulu_dq_512: { - if (Value *V = simplifyX86muldq(*II, *Builder)) + if (Value *V = simplifyX86muldq(*II, Builder)) return replaceInstUsesWith(*II, V); unsigned VWidth = II->getType()->getVectorNumElements(); @@ -2703,7 +2703,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packsswb: case Intrinsic::x86_avx512_packssdw_512: case Intrinsic::x86_avx512_packsswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, true)) + if (Value *V = simplifyX86pack(*II, true)) return replaceInstUsesWith(*II, V); break; @@ -2713,7 +2713,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packuswb: case Intrinsic::x86_avx512_packusdw_512: case Intrinsic::x86_avx512_packuswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, false)) + if (Value *V = simplifyX86pack(*II, false)) return replaceInstUsesWith(*II, V); break; @@ -2756,7 +2756,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::x86_sse41_insertps: - if (Value *V = simplifyX86insertps(*II, *Builder)) + if (Value *V = simplifyX86insertps(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2779,7 +2779,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { : nullptr; // Attempt to simplify to a constant, shuffle vector or EXTRQI call. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQ only uses the lowest 64-bits of the first 128-bit vector @@ -2811,7 +2811,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt *CIIndex = dyn_cast(II->getArgOperand(2)); // Attempt to simplify to a constant or shuffle vector. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQI only uses the lowest 64-bits of the first 128-bit vector @@ -2843,7 +2843,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { const APInt &V11 = CI11->getValue(); APInt Len = V11.zextOrTrunc(6); APInt Idx = V11.lshr(8).zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2876,7 +2876,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (CILength && CIIndex) { APInt Len = CILength->getValue().zextOrTrunc(6); APInt Idx = CIIndex->getValue().zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2930,7 +2930,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: case Intrinsic::x86_avx512_pshuf_b_512: - if (Value *V = simplifyX86pshufb(*II, *Builder)) + if (Value *V = simplifyX86pshufb(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2940,13 +2940,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vpermilvar_pd: case Intrinsic::x86_avx_vpermilvar_pd_256: case Intrinsic::x86_avx512_vpermilvar_pd_512: - if (Value *V = simplifyX86vpermilvar(*II, *Builder)) + if (Value *V = simplifyX86vpermilvar(*II, Builder)) return replaceInstUsesWith(*II, V); break; case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: - if (Value *V = simplifyX86vpermv(*II, *Builder)) + if (Value *V = simplifyX86vpermv(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2964,10 +2964,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_mask_permvar_sf_512: case Intrinsic::x86_avx512_mask_permvar_si_256: case Intrinsic::x86_avx512_mask_permvar_si_512: - if (Value *V = simplifyX86vpermv(*II, *Builder)) { + if (Value *V = simplifyX86vpermv(*II, Builder)) { // We simplified the permuting, now create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } break; @@ -2976,7 +2976,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: - if (Value *V = simplifyX86vperm2(*II, *Builder)) + if (Value *V = simplifyX86vperm2(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -3009,7 +3009,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomd: case Intrinsic::x86_xop_vpcomq: case Intrinsic::x86_xop_vpcomw: - if (Value *V = simplifyX86vpcom(*II, *Builder, true)) + if (Value *V = simplifyX86vpcom(*II, Builder, true)) return replaceInstUsesWith(*II, V); break; @@ -3017,7 +3017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomud: case Intrinsic::x86_xop_vpcomuq: case Intrinsic::x86_xop_vpcomuw: - if (Value *V = simplifyX86vpcom(*II, *Builder, false)) + if (Value *V = simplifyX86vpcom(*II, Builder, false)) return replaceInstUsesWith(*II, V); break; @@ -3044,10 +3044,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), - Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), - Mask->getType()); + Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1), + Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -3067,13 +3067,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, - Builder->getInt32(Idx&15)); + Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, + Builder.getInt32(Idx&15)); } // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - Builder->getInt32(i)); + Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx], + Builder.getInt32(i)); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -3238,7 +3238,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Mask == (S_NAN | Q_NAN)) { // Equivalent of isnan. Replace with standard fcmp. - Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0); + Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0); FCmp->takeName(II); return replaceInstUsesWith(*II, FCmp); } @@ -3250,7 +3250,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Clamp mask to used bits if ((Mask & FullMask) != Mask) { - CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(), { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) } ); @@ -3343,13 +3343,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: Also emit sub if only width is constant. if (!CWidth && COffset && Offset == 0) { Constant *KSize = ConstantInt::get(COffset->getType(), IntSize); - Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2)); - ShiftVal = Builder->CreateZExt(ShiftVal, II->getType()); + Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2)); + ShiftVal = Builder.CreateZExt(ShiftVal, II->getType()); - Value *Shl = Builder->CreateShl(Src, ShiftVal); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, ShiftVal) : - Builder->CreateLShr(Shl, ShiftVal); + Value *Shl = Builder.CreateShl(Src, ShiftVal); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal) + : Builder.CreateLShr(Shl, ShiftVal); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } @@ -3360,17 +3359,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: This allows folding to undef when the hardware has specific // behavior? if (Offset + Width < IntSize) { - Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, IntSize - Width) : - Builder->CreateLShr(Shl, IntSize - Width); + Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width) + : Builder.CreateLShr(Shl, IntSize - Width); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } - Value *RightShift = Signed ? - Builder->CreateAShr(Src, Offset) : - Builder->CreateLShr(Src, Offset); + Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset) + : Builder.CreateLShr(Src, Offset); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); @@ -3439,7 +3436,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (match(Src2, m_NaN()) || isa(Src2)) { - CallInst *NewCall = Builder->CreateMinNum(Src0, Src1); + CallInst *NewCall = Builder.CreateMinNum(Src0, Src1); NewCall->copyFastMathFlags(II); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); @@ -3451,7 +3448,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), C2->getValueAPF()); return replaceInstUsesWith(*II, - ConstantFP::get(Builder->getContext(), Result)); + ConstantFP::get(Builder.getContext(), Result)); } } } @@ -3494,7 +3491,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; MDNode *MD = MDNode::get(II->getContext(), MDArgs); Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent); NewCall->takeName(II); @@ -3556,7 +3553,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SrcLHS->getType()); Value *Args[] = { SrcLHS, SrcRHS, ConstantInt::get(CC->getType(), SrcPred) }; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); } @@ -3633,16 +3630,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // the InstCombineIRInserter object. Value *AssumeIntrinsic = II->getCalledValue(), *A, *B; if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { - Builder->CreateCall(AssumeIntrinsic, A, II->getName()); - Builder->CreateCall(AssumeIntrinsic, B, II->getName()); + Builder.CreateCall(AssumeIntrinsic, A, II->getName()); + Builder.CreateCall(AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A), - II->getName()); - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B), - II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName()); return eraseInstFromFunction(*II); } @@ -3726,7 +3721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return eraseInstFromFunction(*NextInst); // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). - II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond)); + II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); return eraseInstFromFunction(*NextInst); } break; @@ -4163,7 +4158,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Value *NewArg = *AI; if ((*AI)->getType() != ParamTy) - NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy); + NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); Args.push_back(NewArg); // Add any parameter attributes. @@ -4189,7 +4184,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - NewArg = Builder->CreateCast(opcode, *AI, PTy); + NewArg = Builder.CreateCast(opcode, *AI, PTy); } Args.push_back(NewArg); @@ -4215,10 +4210,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallSite NewCS; if (InvokeInst *II = dyn_cast(Caller)) { - NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args, OpBundles); + NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args, OpBundles); } else { - NewCS = Builder->CreateCall(Callee, Args, OpBundles); + NewCS = Builder.CreateCall(Callee, Args, OpBundles); cast(NewCS.getInstruction()) ->setTailCallKind(cast(Caller)->getTailCallKind()); } @@ -4328,7 +4323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); + NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewArgAttrs.push_back(NestAttr); } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index d3049389dfb9..dfdfd3e9da84 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -84,7 +84,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { PointerType *PTy = cast(CI.getType()); - BuilderTy AllocaBuilder(*Builder); + BuilderTy AllocaBuilder(Builder); AllocaBuilder.SetInsertPoint(&AI); // Get the type really allocated and the type casted to. @@ -406,8 +406,7 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 /// ---> /// extractelement <4 x i32> %X, 1 -static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, - const DataLayout &DL) { +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) { Value *TruncOp = Trunc.getOperand(0); Type *DestType = Trunc.getType(); if (!TruncOp->hasOneUse() || !isa(DestType)) @@ -434,14 +433,14 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, unsigned NumVecElts = VecWidth / DestWidth; if (VecType->getElementType() != DestType) { VecType = VectorType::get(DestType, NumVecElts); - VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc"); } unsigned Elt = ShiftAmount / DestWidth; - if (DL.isBigEndian()) + if (IC.getDataLayout().isBigEndian()) Elt = NumVecElts - 1 - Elt; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt)); } /// Try to narrow the width of bitwise logic instructions with constants. @@ -460,7 +459,7 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) { // trunc (logic X, C) --> logic (trunc X, C') Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy); - Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy); + Value *NarrowOp0 = Builder.CreateTrunc(LogicOp->getOperand(0), DestTy); return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC); } @@ -554,7 +553,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { Constant *One = ConstantInt::get(SrcTy, 1); - Src = Builder->CreateAnd(Src, One); + Src = Builder.CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -580,7 +579,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Since we're doing an lshr and a zero extend, and know that the shift // amount is smaller than ASize, it is always safe to do the shift in A's // type, then zero extend or truncate to the result. - Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue()); Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, DestTy, false); } @@ -610,7 +609,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(), std::min(ShiftAmt, ASize - 1))); if (SExt->hasOneUse()) { - Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); + Value *Shift = Builder.CreateAShr(A, std::min(ShiftAmt, ASize - 1)); Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), true); } @@ -620,10 +619,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *I = shrinkBitwiseLogic(CI)) return I; - if (Instruction *I = shrinkSplatShuffle(CI, *Builder)) + if (Instruction *I = shrinkSplatShuffle(CI, Builder)) return I; - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; if (Src->hasOneUse() && isa(SrcTy) && @@ -636,7 +635,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // FoldShiftByConstant and is the extend in reg pattern. const unsigned DestSize = DestTy->getScalarSizeInBits(); if (Cst->getValue().ult(DestSize)) { - Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr"); + Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr"); return BinaryOperator::Create( Instruction::Shl, NewTrunc, @@ -645,7 +644,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } } - if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; return nullptr; @@ -668,13 +667,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits() - 1); - In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit"); + In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); + In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName() + ".not"); + In = Builder.CreateXor(In, One, In->getName() + ".not"); } return replaceInstUsesWith(CI, In); @@ -713,19 +712,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), - In->getName() + ".lobit"); + In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), + In->getName() + ".lobit"); } if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One); + In = Builder.CreateXor(In, One); } if (CI.getType() == In->getType()) return replaceInstUsesWith(CI, In); - Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false); + Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false); return replaceInstUsesWith(CI, IntCast); } } @@ -748,19 +747,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; - Value *Result = Builder->CreateXor(LHS, RHS); + Value *Result = Builder.CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. if (KnownLHS.One.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, + Result = Builder.CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( + Result = Builder.CreateLShr( Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1)); Result->takeName(ICI); return replaceInstUsesWith(CI, Result); } @@ -960,7 +959,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask"); return new ZExtInst(And, CI.getType()); } @@ -970,7 +969,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { AndValue)); } if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType()); + Value *Trunc = Builder.CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), @@ -992,8 +991,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName()); BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast); // Perform the elimination. @@ -1020,7 +1019,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) && X->getType() == CI.getType()) { Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC); + return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC); } return nullptr; @@ -1043,12 +1042,12 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Value *Sh = ConstantInt::get(Op0->getType(), Op0->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/); if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); + In = Builder.CreateNot(In, In->getName() + ".not"); return replaceInstUsesWith(CI, In); } } @@ -1079,26 +1078,26 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); // Perform a right shift to place the desired bit in the LSB. if (ShiftAmt) - In = Builder->CreateLShr(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); // At this point "In" is either 1 or 0. Subtract 1 to turn // {1, 0} -> {0, -1}. - In = Builder->CreateAdd(In, - ConstantInt::getAllOnesValue(In->getType()), - "sext"); + In = Builder.CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); } else { // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); // Perform a left shift to place the desired bit in the MSB. if (ShiftAmt) - In = Builder->CreateShl(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); // Distribute the bit over the whole bit width. - In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), - KnownZeroMask.getBitWidth() - 1), "sext"); + In = Builder.CreateAShr(In, ConstantInt::get(In->getType(), + KnownZeroMask.getBitWidth() - 1), "sext"); } if (CI.getType() == In->getType()) @@ -1191,7 +1190,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // instead. KnownBits Known = computeKnownBits(Src, 0, &CI); if (Known.isNonNegative()) { - Value *ZExt = Builder->CreateZExt(Src, DestTy); + Value *ZExt = Builder.CreateZExt(Src, DestTy); return replaceInstUsesWith(CI, ZExt); } @@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"), ShAmt); } @@ -1229,7 +1228,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt); + return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt); } if (ICmpInst *ICI = dyn_cast(Src)) @@ -1258,7 +1257,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - A = Builder->CreateShl(A, ShAmtV, CI.getName()); + A = Builder.CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } @@ -1347,9 +1346,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // case of interest here is (float)((double)float + float)). if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1364,9 +1363,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // in the destination format if it can represent both sources. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFMul(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1382,9 +1381,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // TODO: Tighten bound via rigorous analysis of the unbalanced case. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFDiv(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1399,11 +1398,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (SrcWidth == OpWidth) break; if (LHSWidth < SrcWidth) - LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, RHSOrig->getType()); else if (RHSWidth <= SrcWidth) - RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, LHSOrig->getType()); if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) { - Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig); + Value *ExactResult = Builder.CreateFRem(LHSOrig, RHSOrig); if (Instruction *RI = dyn_cast(ExactResult)) RI->copyFastMathFlags(OpI); return CastInst::CreateFPCast(ExactResult, CI.getType()); @@ -1412,8 +1411,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // (fptrunc (fneg x)) -> (fneg (fptrunc x)) if (BinaryOperator::isFNeg(OpI)) { - Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), - CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1), + CI.getType()); Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc); RI->copyFastMathFlags(OpI); return RI; @@ -1432,10 +1431,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { (isa(SI->getOperand(1)) || isa(SI->getOperand(2))) && matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) { - Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), - CI.getType()); - Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), - CI.getType()); + Value *LHSTrunc = Builder.CreateFPTrunc(SI->getOperand(1), CI.getType()); + Value *RHSTrunc = Builder.CreateFPTrunc(SI->getOperand(2), CI.getType()); return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); } @@ -1465,7 +1462,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // Do unary FP operation on smaller type. // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(Src, CI.getType()); Type *IntrinsicType[] = { CI.getType() }; Function *Overload = Intrinsic::getDeclaration( CI.getModule(), II->getIntrinsicID(), IntrinsicType); @@ -1482,7 +1479,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; return nullptr; @@ -1577,7 +1574,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getType()->isVectorTy()) // Handle vectors of pointers. Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty); return new IntToPtrInst(P, CI.getType()); } @@ -1627,7 +1624,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy); return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } @@ -1653,7 +1650,7 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); - InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } // Now that the element types match, get the shuffle mask and RHS of the @@ -1833,8 +1830,8 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (!Elements[i]) continue; // Unset element. - Result = IC.Builder->CreateInsertElement(Result, Elements[i], - IC.Builder->getInt32(i)); + Result = IC.Builder.CreateInsertElement(Result, Elements[i], + IC.Builder.getInt32(i)); } return Result; @@ -1845,8 +1842,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// vectors better than bitcasts of scalars because vector registers are /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, - InstCombiner &IC, - const DataLayout &DL) { + InstCombiner &IC) { // TODO: Create and use a pattern matcher for ExtractElementInst. auto *ExtElt = dyn_cast(BitCast.getOperand(0)); if (!ExtElt || !ExtElt->hasOneUse()) @@ -1860,8 +1856,8 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements(); auto *NewVecType = VectorType::get(DestType, NumElts); - auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(), - NewVecType, "bc"); + auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), + NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } @@ -1870,7 +1866,7 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder) { Type *DestTy = BitCast.getType(); BinaryOperator *BO; - if (!DestTy->getScalarType()->isIntegerTy() || + if (!DestTy->isIntOrIntVectorTy() || !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; @@ -2033,8 +2029,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { // For each old PHI node, create a corresponding new PHI node with a type A. SmallDenseMap NewPNodes; for (auto *OldPN : OldPhiNodes) { - Builder->SetInsertPoint(OldPN); - PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands()); + Builder.SetInsertPoint(OldPN); + PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands()); NewPNodes[OldPN] = NewPN; } @@ -2047,8 +2043,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { if (auto *C = dyn_cast(V)) { NewV = ConstantExpr::getBitCast(C, DestTy); } else if (auto *LI = dyn_cast(V)) { - Builder->SetInsertPoint(LI->getNextNode()); - NewV = Builder->CreateBitCast(LI, DestTy); + Builder.SetInsertPoint(LI->getNextNode()); + NewV = Builder.CreateBitCast(LI, DestTy); Worklist.Add(LI); } else if (auto *BCI = dyn_cast(V)) { NewV = BCI->getOperand(0); @@ -2064,9 +2060,9 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { for (User *U : PN->users()) { auto *SI = dyn_cast(U); if (SI && SI->isSimple() && SI->getOperand(0) == PN) { - Builder->SetInsertPoint(SI); + Builder.SetInsertPoint(SI); auto *NewBC = - cast(Builder->CreateBitCast(NewPNodes[PN], SrcTy)); + cast(Builder.CreateBitCast(NewPNodes[PN], SrcTy)); SI->setOperand(0, NewBC); Worklist.Add(SI); assert(hasStoreUsersOnly(*NewBC)); @@ -2121,14 +2117,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { - SmallVector Idxs(NumZeros + 1, Builder->getInt32(0)); + SmallVector Idxs(NumZeros + 1, Builder.getInt32(0)); return GetElementPtrInst::CreateInBounds(Src, Idxs); } } if (VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) @@ -2161,7 +2157,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // scalar-scalar cast. if (!DestTy->isVectorTy()) { Value *Elem = - Builder->CreateExtractElement(Src, + Builder.CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); } @@ -2190,8 +2186,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Tmp->getOperand(0)->getType() == DestTy) || ((Tmp = dyn_cast(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -2204,13 +2200,13 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (Instruction *I = optimizeBitCastFromPhi(CI, PN)) return I; - if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL)) + if (Instruction *I = canonicalizeBitCastExtElt(CI, *this)) return I; - if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder)) + if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder)) return I; - if (Instruction *I = foldBitCastSelect(CI, *Builder)) + if (Instruction *I = foldBitCastSelect(CI, Builder)) return I; if (SrcTy->isPointerTy()) @@ -2234,7 +2230,7 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { MidTy = VectorType::get(MidTy, VT->getNumElements()); } - Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); return new AddrSpaceCastInst(NewBitCast, CI.getType()); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 58b8b2f52629..60d1cde971dd 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -392,7 +392,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) - Idx = Builder->CreateTrunc(Idx, IntPtrTy); + Idx = Builder.CreateTrunc(Idx, IntPtrTy); } // If the comparison is only true for one or two elements, emit direct @@ -400,7 +400,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getFalse()); + return replaceInstUsesWith(ICI, Builder.getFalse()); Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); @@ -409,9 +409,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); // True for two elements -> 'i == 47 | i == 72'. - Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); - Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx); return BinaryOperator::CreateOr(C1, C2); } @@ -420,7 +420,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondFalseElement != Overdefined) { // None false -> true. if (FirstFalseElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getTrue()); + return replaceInstUsesWith(ICI, Builder.getTrue()); Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); @@ -429,9 +429,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); // False for two elements -> 'i != 47 & i != 72'. - Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); - Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -443,7 +443,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstTrue) getType(), -FirstTrueElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -457,7 +457,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -481,9 +481,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + Value *V = Builder.CreateIntCast(Idx, Ty, false); + V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } } @@ -566,7 +566,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; } @@ -588,10 +588,10 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) - VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); + return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset"); } /// Returns true if we can rewrite Start as a GEP with pointer Base @@ -981,13 +981,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (LHSIndexTy != RHSIndexTy) { if (LHSIndexTy->getPrimitiveSizeInBits() < RHSIndexTy->getPrimitiveSizeInBits()) { - ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); + ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); } else - LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); + LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); } - Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), - LOffset, ROffset); + Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond), + LOffset, ROffset); return replaceInstUsesWith(I, Cmp); } @@ -1026,7 +1026,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return replaceInstUsesWith(I, // No comparison is needed here. - Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); + Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); @@ -1174,7 +1174,7 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI, // (X+ -1) >s X --> X X == -128 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = Builder->getInt(CI->getValue()-1); + Constant *C = Builder.getInt(CI->getValue() - 1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); } @@ -1347,17 +1347,17 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sadd_with_overflow, NewType); - InstCombiner::BuilderTy *Builder = IC.Builder; + InstCombiner::BuilderTy &Builder = IC.Builder; // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. - Builder->SetInsertPoint(OrigAdd); + Builder.SetInsertPoint(OrigAdd); - Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc"); - Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc"); - CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd"); - Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); - Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc"); + Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc"); + CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd"); + Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType()); // The inner add was the result of the narrow add, zero extended to the // wider type. Replace it with the result computed by the intrinsic. @@ -1434,9 +1434,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { ConstantRange Intersection = DominatingCR.intersectWith(CR); ConstantRange Difference = DominatingCR.difference(CR); if (Intersection.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (Difference.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); // If this is a normal comparison, it demands all bits. If it is a sign // bit comparison, it only demands the sign bit. @@ -1452,9 +1452,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { return nullptr; if (auto *AI = Intersection.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI)); + return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI)); if (auto *AD = Difference.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD)); + return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD)); } return nullptr; @@ -1628,11 +1628,11 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, !Shift->isArithmeticShift() && !isa(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = - IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1)) - : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1)); + IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1)) + : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1)); // Compute X & (C2 << Y). - Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift); + Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1670,7 +1670,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, unsigned WideScalarBits = WideType->getScalarSizeInBits(); Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits)); Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); - Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName()); + Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); } } @@ -1704,12 +1704,12 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); } else { if (UsesRemoved >= 3) - NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(), - /*HasNUW=*/true), - One, Or->getName()); + NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(), + /*HasNUW=*/true), + One, Or->getName()); } if (NewOr) { - Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName()); + Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName()); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1772,7 +1772,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); if (And->getType()->isVectorTy()) NTy = VectorType::get(NTy, And->getType()->getVectorNumElements()); - Value *Trunc = Builder->CreateTrunc(X, NTy); + Value *Trunc = Builder.CreateTrunc(X, NTy); auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); @@ -1811,9 +1811,9 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). Value *CmpP = - Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); + Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); Value *CmpQ = - Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); + Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And : Instruction::Or; return BinaryOperator::Create(LogicOpc, CmpP, CmpQ); @@ -1993,7 +1993,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt)); return new ICmpInst(Pred, And, LShrC); } @@ -2005,7 +2005,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1)); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(ShType)); } @@ -2024,7 +2024,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements()); Constant *NewC = ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt)); - return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC); + return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); } return nullptr; @@ -2076,8 +2076,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, Constant *DivCst = ConstantInt::get( Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact()) - : Builder->CreateUDiv(X, DivCst, "", Shr->isExact()); + Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact()) + : Builder.CreateUDiv(X, DivCst, "", Shr->isExact()); Cmp.setOperand(0, Tmp); @@ -2115,7 +2115,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Otherwise strength reduce the shift into an 'and'. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(Shr->getType(), Val); - Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); return new ICmpInst(Pred, And, ShiftedCmpRHS); } @@ -2279,7 +2279,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, default: llvm_unreachable("Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); @@ -2291,7 +2291,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiBound->getUniqueInteger(), DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); @@ -2305,16 +2305,16 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (LoOverflow == -1) // Low bound is less than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow == -1) // High bound less than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); @@ -2361,12 +2361,12 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == (*C - 1)) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X); + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X); // C2 - Y >u C -> (Y | C) != C2 // iff C2 & C == C and C + 1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X); + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X); return nullptr; } @@ -2422,14 +2422,14 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, // iff C & (C2-1) == 0 // C2 is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)), + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)), ConstantExpr::getNeg(cast(Y))); // X+C >u C2 -> (X & ~C2) != C // iff C & C2 == 0 // C2+1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)), + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)), ConstantExpr::getNeg(cast(Y))); return nullptr; @@ -2493,13 +2493,13 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, // When none of the three constants satisfy the predicate for the RHS (C), // the entire original Cmp can be simplified to a false. - Value *Cond = Builder->getFalse(); + Value *Cond = Builder.getFalse(); if (TrueWhenLessThan) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); if (TrueWhenEqual) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); if (TrueWhenGreaterThan) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); return replaceInstUsesWith(Cmp, Cond); } @@ -2615,7 +2615,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, if (C->isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { - Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName()); + Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); return new ICmpInst(Pred, NewRem, Constant::getNullValue(BO->getType())); } @@ -2637,7 +2637,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(Pred, NegVal, BOp1); if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); + Value *Neg = Builder.CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(Pred, BOp0, Neg); } @@ -2676,7 +2676,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace (X | C) == -1 with (X & ~C) == ~C. // This removes the -1 constant. Constant *NotBOC = ConstantExpr::getNot(cast(BOp1)); - Value *And = Builder->CreateAnd(BOp0, NotBOC); + Value *And = Builder.CreateAnd(BOp0, NotBOC); return new ICmpInst(Pred, And, NotBOC); } break; @@ -2740,23 +2740,26 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, if (!II || !Cmp.isEquality()) return nullptr; - // Handle icmp {eq|ne} , intcst. + // Handle icmp {eq|ne} , Constant. + Type *Ty = II->getType(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, Builder->getInt(C->byteSwap())); + Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap())); return &Cmp; + case Intrinsic::ctlz: case Intrinsic::cttz: // ctz(A) == bitwidth(A) -> A == 0 and likewise for != if (*C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, ConstantInt::getNullValue(II->getType())); + Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } break; + case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != @@ -2764,8 +2767,8 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, if (IsZero || *C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - auto *NewOp = IsZero ? Constant::getNullValue(II->getType()) - : Constant::getAllOnesValue(II->getType()); + auto *NewOp = + IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty); Cmp.setOperand(1, NewOp); return &Cmp; } @@ -2774,6 +2777,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, default: break; } + return nullptr; } @@ -2841,11 +2845,11 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { } if (Transform) { if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, - I.getName()); + Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, + I.getName()); if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, - I.getName()); + Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, + I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); } break; @@ -3029,12 +3033,12 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { APInt AP1Abs = C1->getValue().abs(); APInt AP2Abs = C2->getValue().abs(); if (AP1Abs.uge(AP2Abs)) { - ConstantInt *C3 = Builder->getInt(AP1 - AP2); - Value *NewAdd = Builder->CreateNSWAdd(A, C3); + ConstantInt *C3 = Builder.getInt(AP1 - AP2); + Value *NewAdd = Builder.CreateNSWAdd(A, C3); return new ICmpInst(Pred, NewAdd, C); } else { - ConstantInt *C3 = Builder->getInt(AP2 - AP1); - Value *NewAdd = Builder->CreateNSWAdd(C, C3); + ConstantInt *C3 = Builder.getInt(AP2 - AP1); + Value *NewAdd = Builder.CreateNSWAdd(C, C3); return new ICmpInst(Pred, A, NewAdd); } } @@ -3157,8 +3161,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { Constant *Mask = ConstantInt::get( BO0->getType(), APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); - Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } // If there are no trailing zeros in the multiplier, just eliminate @@ -3315,8 +3319,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { ConstantInt *C1, *C2; if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC); + Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue()); + Value *Xor = Builder.CreateXor(C, NC); return new ICmpInst(Pred, A, Xor); } @@ -3362,8 +3366,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { } if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y); - Op1 = Builder->CreateAnd(Op1, Z); + Op1 = Builder.CreateXor(X, Y); + Op1 = Builder.CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; @@ -3380,7 +3384,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt Pow2 = Cst1->getValue() + 1; if (Pow2.isPowerOf2() && isa(A->getType()) && Pow2.logBase2() == cast(A->getType())->getBitWidth()) - return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType())); + return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); } // (A >> C) == (B >> C) --> (A^B) u< (1 << C) @@ -3394,9 +3398,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { if (ShAmt < TypeBits && ShAmt != 0) { ICmpInst::Predicate NewPred = Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); - return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal)); + return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); } } @@ -3406,9 +3410,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { unsigned TypeBits = Cst1->getBitWidth(); unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); - Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal), I.getName() + ".mask"); return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType())); } @@ -3433,11 +3437,20 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt CmpV = Cst1->getValue().zext(ASize); CmpV <<= ShAmt; - Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); - return new ICmpInst(Pred, Mask, Builder->getInt(CmpV)); + Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV)); + return new ICmpInst(Pred, Mask, Builder.getInt(CmpV)); } } + // If both operands are byte-swapped or bit-reversed, just compare the + // original values. + // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant() + // and handle more intrinsics. + if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) || + (match(Op0, m_BitReverse(m_Value(A))) && + match(Op1, m_BitReverse(m_Value(B))))) + return new ICmpInst(Pred, A, B); + return nullptr; } @@ -3462,7 +3475,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { RHSOp = RHSC->getOperand(0); // If the pointer types don't match, insert a bitcast. if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType()); } } else if (auto *RHSC = dyn_cast(ICmp.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); @@ -3546,7 +3559,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { // We're performing an unsigned comp with a sign extended value. // This is true if the input is >= 0. [aka >s -1] Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); + Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); // Finally, return the value computed. if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) @@ -3574,7 +3587,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, // may be pointing to the compare. We want to insert the new instructions // before the add in case there are uses of the add between the add and the // compare. - Builder->SetInsertPoint(&OrigI); + Builder.SetInsertPoint(&OrigI); switch (OCF) { case OCF_INVALID: @@ -3583,11 +3596,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_ADD: { OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true); // Fall through uadd into sadd LLVM_FALLTHROUGH; @@ -3595,13 +3608,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_ADD: { // X + 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); // We can strength reduce this signed add into a regular add if we can prove // that it will never overflow. if (OCF == OCF_SIGNED_ADD) if (willNotOverflowSignedAdd(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(), true); break; } @@ -3610,15 +3623,15 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_SUB: { // X - 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_SUB) { if (willNotOverflowSignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(), true); } else { if (willNotOverflowUnsignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(), true); } break; @@ -3627,28 +3640,28 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_MUL: { OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true); LLVM_FALLTHROUGH; } case OCF_SIGNED_MUL: // X * undef -> undef if (isa(RHS)) - return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false); + return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false); // X * 0 -> {0, false} if (match(RHS, m_Zero())) - return SetResult(RHS, Builder->getFalse(), false); + return SetResult(RHS, Builder.getFalse(), false); // X * 1 -> {X, false} if (match(RHS, m_One())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_MUL) if (willNotOverflowSignedMul(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(), true); break; } @@ -3813,25 +3826,25 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, return nullptr; } - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(MulInstr); + InstCombiner::BuilderTy &Builder = IC.Builder; + Builder.SetInsertPoint(MulInstr); // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) Value *MulA = A, *MulB = B; if (WidthA < MulWidth) - MulA = Builder->CreateZExt(A, MulType); + MulA = Builder.CreateZExt(A, MulType); if (WidthB < MulWidth) - MulB = Builder->CreateZExt(B, MulType); + MulB = Builder.CreateZExt(B, MulType); Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::umul_with_overflow, MulType); - CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul"); + CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); IC.Worklist.Add(MulInstr); // If there are uses of mul result other than the comparison, we know that // they are truncation or binary AND. Change them to use result of // mul.with.overflow and adjust properly mask/size. if (MulVal->hasNUsesOrMore(2)) { - Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); for (User *U : MulVal->users()) { if (U == &I || U == OtherVal) continue; @@ -3843,17 +3856,18 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, } else if (BinaryOperator *BO = dyn_cast(U)) { assert(BO->getOpcode() == Instruction::And); // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) - ConstantInt *CI = cast(BO->getOperand(1)); - APInt ShortMask = CI->getValue().trunc(MulWidth); - Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); - Instruction *Zext = - cast(Builder->CreateZExt(ShortAnd, BO->getType())); - IC.Worklist.Add(Zext); + Value *ShortMask = + Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth)); + Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); + Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType()); + if (auto *ZextI = dyn_cast(Zext)) + IC.Worklist.Add(ZextI); IC.replaceInstUsesWith(*BO, Zext); } else { llvm_unreachable("Unexpected Binary operation"); } - IC.Worklist.Add(cast(U)); + if (auto *UI = dyn_cast(U)) + IC.Worklist.Add(UI); } } if (isa(OtherVal)) @@ -3884,7 +3898,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, llvm_unreachable("Unexpected predicate"); } if (Inverse) { - Value *Res = Builder->CreateExtractValue(Call, 1); + Value *Res = Builder.CreateExtractValue(Call, 1); return BinaryOperator::CreateNot(Res); } @@ -4239,7 +4253,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Max == Op0Min + 1) // A A == C-1 if min(A)+1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() - 1)); + Builder.getInt(CI->getValue() - 1)); } break; case ICmpInst::ICMP_SGT: @@ -4253,7 +4267,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast(Op1)) { if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() + 1)); + Builder.getInt(CI->getValue() + 1)); } break; case ICmpInst::ICMP_SGE: @@ -4358,7 +4372,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { static Instruction *canonicalizeICmpBool(ICmpInst &I, InstCombiner::BuilderTy &Builder) { Value *A = I.getOperand(0), *B = I.getOperand(1); - assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only"); + assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only"); // A boolean compared to true/false can be simplified to Op0/true/false in // 14 out of the 20 (10 predicates * 2 constants) possible combinations. @@ -4465,8 +4479,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } - if (Op0->getType()->getScalarType()->isIntegerTy(1)) - if (Instruction *Res = canonicalizeICmpBool(I, *Builder)) + if (Op0->getType()->isIntOrIntVectorTy(1)) + if (Instruction *Res = canonicalizeICmpBool(I, Builder)) return Res; if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) @@ -4559,7 +4573,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); } else { // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + Op1 = Builder.CreateBitCast(Op1, Op0->getType()); } } return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -4592,8 +4606,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) - return new ICmpInst(I.getInversePredicate(), - Builder->CreateAnd(A, B), + return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B), Op1); // ~X < ~Y --> Y < X @@ -4693,10 +4706,10 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); } } @@ -4762,9 +4775,9 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Pred = ICmpInst::ICMP_NE; break; case FCmpInst::FCMP_ORD: - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case FCmpInst::FCMP_UNO: - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); } // Now we know that the APFloat is a normal number, zero or inf. @@ -4782,8 +4795,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -4794,8 +4807,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4807,8 +4820,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // See if the RHS value is < UnsignedMin. @@ -4818,8 +4831,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4841,14 +4854,14 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, switch (Pred) { default: llvm_unreachable("Unexpected integer comparison!"); case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); case ICmpInst::ICMP_ULE: // (float)int <= 4.4 --> int <= 4 // (float)int <= -4.4 --> false if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); break; case ICmpInst::ICMP_SLE: // (float)int <= 4.4 --> int <= 4 @@ -4860,7 +4873,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int < -4.4 --> false // (float)int < 4.4 --> int <= 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); Pred = ICmpInst::ICMP_ULE; break; case ICmpInst::ICMP_SLT: @@ -4873,7 +4886,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int > 4.4 --> int > 4 // (float)int > -4.4 --> true if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); break; case ICmpInst::ICMP_SGT: // (float)int > 4.4 --> int > 4 @@ -4885,7 +4898,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); Pred = ICmpInst::ICMP_UGT; break; case ICmpInst::ICMP_SGE: diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 87f11467b95e..c38a4981bf1d 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -21,8 +21,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -212,7 +210,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// \brief An IRBuilder that automatically inserts new instructions into the /// worklist. typedef IRBuilder BuilderTy; - BuilderTy *Builder; + BuilderTy &Builder; private: // Mode in which we are running the combiner. @@ -235,7 +233,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner bool MadeIRChange; public: - InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, + InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) @@ -598,9 +596,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). - Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, - Instruction::BinaryOps, Value *, Value *, Value *, - Value *); + Value *tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, + Value *, Value *, Value *); /// Match a select chain which produces one of three values based on whether /// the LHS is less than, equal to, or greater than RHS respectively. @@ -639,7 +636,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner APInt &UndefElts, unsigned Depth = 0); Value *SimplifyVectorOp(BinaryOperator &Inst); - Value *SimplifyBSwap(BinaryOperator &Inst); /// Given a binary operator, cast instruction, or select which has a PHI node diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 26bee204e5a4..c59e1ce69ac2 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -189,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { return nullptr; // Canonicalize it. - Value *V = IC.Builder->getInt32(1); + Value *V = IC.Builder.getInt32(1); AI.setOperand(0, V); return &AI; } @@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -229,7 +229,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // any casting is exposed early. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } @@ -458,10 +458,10 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT SmallVector, 8> MD; LI.getAllMetadata(MD); - LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( - IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( + IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)), LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); - NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; @@ -518,10 +518,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value SmallVector, 8> MD; SI.getAllMetadata(MD); - StoreInst *NewStore = IC.Builder->CreateAlignedStore( - V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + StoreInst *NewStore = IC.Builder.CreateAlignedStore( + V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment(), SI.isVolatile()); - NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; @@ -613,7 +613,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); - IC.Builder->SetInsertPoint(SI); + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); } @@ -664,7 +664,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { AAMDNodes AAMD; LI.getAAMetadata(AAMD); NewLoad->setAAMetadata(AAMD); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -689,15 +689,15 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - Name + ".elt"); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + Name + ".elt"); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); // Propagate AA metadata. It'll still be valid on the narrowed load. AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); - V = IC.Builder->CreateInsertValue(V, L, i); + V = IC.Builder.CreateInsertValue(V, L, i); } V->setName(Name); @@ -712,7 +712,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { AAMDNodes AAMD; LI.getAAMetadata(AAMD); NewLoad->setAAMetadata(AAMD); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -740,14 +740,14 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - Name + ".elt"); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset), - Name + ".unpack"); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + Name + ".elt"); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset), + Name + ".unpack"); AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); - V = IC.Builder->CreateInsertValue(V, L, i); + V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -982,8 +982,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { combineMetadataForCSE(cast(AvailableVal), &LI); return replaceInstUsesWith( - LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), - LI.getName() + ".cast")); + LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), + LI.getName() + ".cast")); } // None of the following transforms are legal for volatile/ordered atomic @@ -1019,15 +1019,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) && isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) { - LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); + LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); assert(LI.isUnordered() && "implied by above"); V1->setAlignment(Align); - V1->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V2->setAlignment(Align); - V2->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -1172,7 +1172,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the struct only have one element, we unpack. unsigned Count = ST->getNumElements(); if (Count == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1201,12 +1201,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - llvm::Instruction *NS = - IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); @@ -1219,7 +1218,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the array only have one element, we unpack. auto NumElements = AT->getNumElements(); if (NumElements == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1252,11 +1251,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, Offset); - Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); @@ -1541,7 +1540,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), - SI.getSynchScope()); + SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); // The debug locations of the original instructions might differ; merge them. NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(), diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 579639a6194e..e3a50220f94e 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -39,8 +39,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, Value *A = nullptr, *B = nullptr, *One = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) && match(One, m_One())) { - A = IC.Builder->CreateSub(A, B); - return IC.Builder->CreateShl(One, A); + A = IC.Builder.CreateSub(A, B); + return IC.Builder.CreateShl(One, A); } // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it @@ -250,9 +250,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { ConstantInt *C1; Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) - Sub = Builder->CreateSub(X, Y, "suba"); + Sub = Builder.CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) - Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc"); if (Sub) return BinaryOperator::CreateMul(Sub, @@ -272,11 +272,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Mul = Builder->CreateMul(C1, Op1); + Value *Mul = Builder.CreateMul(C1, Op1); // Only go forward with the transform if C1*CI simplifies to a tidier // constant. if (!match(Mul, m_Mul(m_Value(), m_Value()))) - return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); + return BinaryOperator::CreateAdd(Builder.CreateMul(X, Op1), Mul); } } } @@ -318,7 +318,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem : Instruction::SRem; - Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1); + Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1); if (DivOp1 == Y) return BinaryOperator::CreateSub(X, Rem); return BinaryOperator::CreateSub(Rem, X); @@ -326,7 +326,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } /// i1 mul -> i1 and. - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -368,7 +368,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + Value *V = Builder.CreateSub(Constant::getNullValue(I.getType()), BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } @@ -386,7 +386,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new SExtInst(NewMul, I.getType()); } } @@ -403,7 +403,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowSignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNSWMul( + Value *NewMul = Builder.CreateNSWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new SExtInst(NewMul, I.getType()); } @@ -422,7 +422,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new ZExtInst(NewMul, I.getType()); } } @@ -439,7 +439,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowUnsignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNUWMul( + Value *NewMul = Builder.CreateNUWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new ZExtInst(NewMul, I.getType()); } @@ -698,11 +698,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // if pattern detected emit alternate sequence if (OpX && OpY) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(Log2->getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); - Value *FMulVal = Builder->CreateFMul(OpX, Log2); - Value *FSub = Builder->CreateFSub(FMulVal, OpX); + Value *FMulVal = Builder.CreateFMul(OpX, Log2); + Value *FSub = Builder.CreateFSub(FMulVal, OpX); FSub->takeName(&I); return replaceInstUsesWith(I, FSub); } @@ -714,23 +714,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { for (int i = 0; i < 2; i++) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); // -X * -Y => X*Y if (N1) { - Value *FMul = Builder->CreateFMul(N0, N1); + Value *FMul = Builder.CreateFMul(N0, N1); FMul->takeName(&I); return replaceInstUsesWith(I, FMul); } if (Opnd0->hasOneUse()) { // -X * Y => -(X*Y) (Promote negation as high as possible) - Value *T = Builder->CreateFMul(N0, Opnd1); - Value *Neg = Builder->CreateFNeg(T); + Value *T = Builder.CreateFMul(N0, Opnd1); + Value *Neg = Builder.CreateFNeg(T); Neg->takeName(&I); return replaceInstUsesWith(I, Neg); } @@ -755,10 +755,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Y = Opnd0_0; if (Y) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); - Value *T = Builder->CreateFMul(Opnd1, Opnd1); - Value *R = Builder->CreateFMul(T, Y); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Value *T = Builder.CreateFMul(Opnd1, Opnd1); + Value *R = Builder.CreateFMul(T, Y); R->takeName(&I); return replaceInstUsesWith(I, R); } @@ -824,7 +824,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { *I = SI->getOperand(NonNullOperand); Worklist.Add(&*BBI); } else if (*I == SelectCond) { - *I = Builder->getInt1(NonNullOperand == 1); + *I = Builder.getInt1(NonNullOperand == 1); Worklist.Add(&*BBI); } } @@ -938,20 +938,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } if (match(Op0, m_One())) { - assert(!I.getType()->getScalarType()->isIntegerTy(1) && - "i1 divide not removed?"); + assert(!I.getType()->isIntOrIntVectorTy(1) && "i1 divide not removed?"); if (I.getOpcode() == Instruction::SDiv) { // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the // result is one, if Op1 is -1 then the result is minus one, otherwise // it's zero. - Value *Inc = Builder->CreateAdd(Op1, Op0); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); + Value *Inc = Builder.CreateAdd(Op1, Op0); + Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(I.getType(), 3)); return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); } else { // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1026,7 +1024,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, // X udiv C, where C >= signbit static Instruction *foldUDivNegCst(Value *Op0, Value *Op1, const BinaryOperator &I, InstCombiner &IC) { - Value *ICI = IC.Builder->CreateICmpULT(Op0, cast(Op1)); + Value *ICI = IC.Builder.CreateICmpULT(Op0, cast(Op1)); return SelectInst::Create(ICI, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); @@ -1045,10 +1043,9 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N)))) llvm_unreachable("match should never fail here!"); if (*CI != 1) - N = IC.Builder->CreateAdd(N, - ConstantInt::get(N->getType(), CI->logBase2())); + N = IC.Builder.CreateAdd(N, ConstantInt::get(N->getType(), CI->logBase2())); if (Op1 != ShiftLeft) - N = IC.Builder->CreateZExt(N, Op1->getType()); + N = IC.Builder.CreateZExt(N, Op1->getType()); BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); if (I.isExact()) LShr->setIsExact(); @@ -1134,7 +1131,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (ZExtInst *ZOp0 = dyn_cast(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) return new ZExtInst( - Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), + Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), I.getType()); // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) @@ -1209,7 +1206,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Constant *NarrowDivisor = ConstantExpr::getTrunc(cast(Op1), Op0Src->getType()); - Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor); + Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor); return new SExtInst(NarrowOp, Op0->getType()); } } @@ -1217,7 +1214,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Constant *RHS = dyn_cast(Op1)) { // X/INT_MIN -> X == INT_MIN if (RHS->isMinSignedValue()) - return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType()); // -X/C --> X/-C provided the negation doesn't overflow. Value *X; @@ -1380,7 +1377,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // (X/Y) / Z => X / (Y*Z) // if (!isa(Y) || !isa(Op1)) { - NewInst = Builder->CreateFMul(Y, Op1); + NewInst = Builder.CreateFMul(Y, Op1); if (Instruction *RI = dyn_cast(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast(Op0)->getFastMathFlags(); @@ -1392,7 +1389,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // Z / (X/Y) => Z*Y / X // if (!isa(Y) || !isa(Op0)) { - NewInst = Builder->CreateFMul(Op0, Y); + NewInst = Builder.CreateFMul(Op0, Y); if (Instruction *RI = dyn_cast(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast(Op1)->getFastMathFlags(); @@ -1483,28 +1480,28 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // (zext A) urem (zext B) --> zext (A urem B) if (ZExtInst *ZOp0 = dyn_cast(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) - return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1), I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(Op1, N1); + Value *Add = Builder.CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); } // 1 urem X -> zext(X != 1) if (match(Op0, m_One())) { - Value *Cmp = Builder->CreateICmpNE(Op1, Op0); - Value *Ext = Builder->CreateZExt(Cmp, I.getType()); + Value *Cmp = Builder.CreateICmpNE(Op1, Op0); + Value *Ext = Builder.CreateZExt(Cmp, I.getType()); return replaceInstUsesWith(I, Ext); } // X urem C -> X < C ? X : X - C, where C >= signbit. const APInt *DivisorC; if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) { - Value *Cmp = Builder->CreateICmpULT(Op0, Op1); - Value *Sub = Builder->CreateSub(Op0, Op1); + Value *Cmp = Builder.CreateICmpULT(Op0, Op1); + Value *Sub = Builder.CreateSub(Op0, Op1); return SelectInst::Create(Cmp, Op0, Sub); } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 5dbf1e85b05b..0011412c2bf4 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -636,10 +636,10 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) { - assert(isa(PN.getType()) && "Expect only intger type phi"); + assert(isa(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast(V)) - if (!ConstVA->isZeroValue()) + if (!ConstVA->isZero()) return ConstVA; return ConstantInt::get(cast(PN.getType()), 1); } @@ -836,12 +836,12 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred->getTerminator()); + Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(), Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 80c6595904e1..4eebe8255998 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -61,12 +61,12 @@ static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF, } } -static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder, +static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF); assert(CmpInst::isIntPredicate(Pred)); - return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B); + return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } /// We want to turn code that looks like this: @@ -167,8 +167,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, // Fold this by inserting a select from the input values. Value *NewSI = - Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName() + ".v", &SI); + Builder.CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName() + ".v", &SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -211,8 +211,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, } // If we reach here, they do have operations in common. - Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, - SI.getName() + ".v", &SI); + Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, + SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); @@ -227,8 +227,8 @@ static bool isSelect01(Constant *C1, Constant *C2) { return false; if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. return false; - return C1I->isOne() || C1I->isAllOnesValue() || - C2I->isOne() || C2I->isAllOnesValue(); + return C1I->isOne() || C1I->isMinusOne() || + C2I->isOne() || C2I->isMinusOne(); } /// Try to fold the select into one of the operands to allow further @@ -254,7 +254,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *TVI_BO = cast(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), @@ -284,7 +284,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa(OOp) || isSelect01(C, cast(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *FVI_BO = cast(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), @@ -315,7 +315,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// 3. The magnitude of C2 and C1 are flipped static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); if (!IC || !SI.getType()->isIntegerTy()) return nullptr; @@ -383,22 +383,22 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, if (NeedAnd) { // Insert the AND instruction on the input to the truncate. APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); - V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1)); + V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1)); } if (C2Log > C1Log) { - V = Builder->CreateZExtOrTrunc(V, Y->getType()); - V = Builder->CreateShl(V, C2Log - C1Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateShl(V, C2Log - C1Log); } else if (C1Log > C2Log) { - V = Builder->CreateLShr(V, C1Log - C2Log); - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateLShr(V, C1Log - C2Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); } else - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); if (NeedXor) - V = Builder->CreateXor(V, *C2); + V = Builder.CreateXor(V, *C2); - return Builder->CreateOr(V, Y); + return Builder.CreateOr(V, Y); } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single @@ -414,7 +414,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -449,8 +449,8 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, IntrinsicInst *NewI = cast(II->clone()); Type *Ty = NewI->getArgOperand(1)->getType(); NewI->setArgOperand(1, Constant::getNullValue(Ty)); - Builder->Insert(NewI); - return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType()); + Builder.Insert(NewI); + return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); } return nullptr; @@ -597,7 +597,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder)) + if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; bool Changed = adjustMinMax(SI, *ICI); @@ -617,23 +617,23 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast(CmpRHS)) { ConstantInt *C1 = nullptr, *C2 = nullptr; - if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) { C1 = dyn_cast(TrueVal); C2 = dyn_cast(FalseVal); - } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) { C1 = dyn_cast(FalseVal); C2 = dyn_cast(TrueVal); } if (C1 && C2) { // This shift results in either -1 or 0. - Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1); // Check if we can express the operation with a single or. - if (C2->isAllOnesValue()) - return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + if (C2->isMinusOne()) + return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1)); - Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); - return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue()); + return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1)); } } } @@ -684,19 +684,19 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, // (X & Y) == 0 ? X : X ^ Y --> X & ~Y if (TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) != 0 ? X ^ Y : X --> X & ~Y else if (!TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) == 0 ? X ^ Y : X --> X | Y else if (TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); // (X & Y) != 0 ? X : X ^ Y --> X | Y else if (!TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); if (V) return replaceInstUsesWith(SI, V); @@ -809,8 +809,8 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { SelectInst *SI = cast(Inner); Value *NewSI = - Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(), - SI->getTrueValue(), SI->getName(), SI); + Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), + SI->getTrueValue(), SI->getName(), SI); return replaceInstUsesWith(Outer, NewSI); } @@ -848,15 +848,15 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { if (!NotA) - NotA = Builder->CreateNot(A); + NotA = Builder.CreateNot(A); if (!NotB) - NotB = Builder->CreateNot(B); + NotB = Builder.CreateNot(B); if (!NotC) - NotC = Builder->CreateNot(C); + NotC = Builder.CreateNot(C); Value *NewInner = generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB); - Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern( + Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC)); return replaceInstUsesWith(Outer, NewOuter); } @@ -868,9 +868,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, /// icmp instruction with zero, and we have an 'and' with the non-constant value /// and a power of two we can turn the select into a shift on the result of the /// 'and'. -static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, - ConstantInt *FalseVal, - InstCombiner::BuilderTy *Builder) { +static Value *foldSelectICmpAnd(const SelectInst &SI, APInt TrueVal, + APInt FalseVal, + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return nullptr; @@ -886,56 +886,53 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = nullptr; - if (!TrueVal->isZero() && !FalseVal->isZero()) { - if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + APInt Offset(TrueVal.getBitWidth(), 0); + if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { + if ((TrueVal - FalseVal).isPowerOf2()) Offset = FalseVal; - else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + else if ((FalseVal - TrueVal).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. - TrueVal = ConstantInt::get(Builder->getContext(), - TrueVal->getValue() - Offset->getValue()); - FalseVal = ConstantInt::get(Builder->getContext(), - FalseVal->getValue() - Offset->getValue()); + TrueVal -= Offset; + FalseVal -= Offset; } // Make sure the mask in the 'and' and one of the select arms is a power of 2. if (!AndRHS->getValue().isPowerOf2() || - (!TrueVal->getValue().isPowerOf2() && - !FalseVal->getValue().isPowerOf2())) + (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. - ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; - unsigned ValZeros = ValC->getValue().logBase2(); + const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; + unsigned ValZeros = ValC.logBase2(); unsigned AndZeros = AndRHS->getValue().logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. - if (AndZeros >= ValC->getBitWidth()) + if (AndZeros >= ValC.getBitWidth()) return nullptr; - Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); + Value *V = Builder.CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) - V = Builder->CreateShl(V, ValZeros - AndZeros); + V = Builder.CreateShl(V, ValZeros - AndZeros); else if (ValZeros < AndZeros) - V = Builder->CreateLShr(V, AndZeros - ValZeros); + V = Builder.CreateLShr(V, AndZeros - ValZeros); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. - bool ShouldNotVal = !TrueVal->isZero(); + bool ShouldNotVal = !TrueVal.isNullValue(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; if (ShouldNotVal) - V = Builder->CreateXor(V, ValC); + V = Builder.CreateXor(V, ValC); // Apply an offset if needed. - if (Offset) - V = Builder->CreateAdd(V, Offset); + if (!Offset.isNullValue()) + V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); return V; } @@ -1024,7 +1021,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too. Value *X = ExtInst->getOperand(0); Type *SmallType = X->getType(); - if (!SmallType->getScalarType()->isIntegerTy(1)) + if (!SmallType->isIntOrIntVectorTy(1)) return nullptr; Constant *C; @@ -1045,7 +1042,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // select Cond, (ext X), C --> ext(select Cond, X, C') // select Cond, C, (ext X) --> ext(select Cond, C', X) - Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); + Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType); } @@ -1184,7 +1181,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (SelType->getScalarType()->isIntegerTy(1) && + if (SelType->isIntOrIntVectorTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { // Change: A = select B, true, C --> A = or B, C @@ -1192,7 +1189,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(TrueVal, m_Zero())) { // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (match(FalseVal, m_Zero())) { @@ -1201,7 +1198,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(FalseVal, m_One())) { // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } @@ -1226,7 +1223,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. - if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { + if (SelType->isIntOrIntVectorTy() && + CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) return new ZExtInst(CondVal, SelType); @@ -1237,20 +1235,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, 0, 1 -> zext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new ZExtInst(NotCond, SelType); } // select C, 0, -1 -> sext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new SExtInst(NotCond, SelType); } } if (ConstantInt *TrueValC = dyn_cast(TrueVal)) if (ConstantInt *FalseValC = dyn_cast(FalseVal)) - if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + if (Value *V = foldSelectICmpAnd(SI, TrueValC->getValue(), + FalseValC->getValue(), Builder)) return replaceInstUsesWith(SI, V); // See if we are selecting two values based on a comparison of the two values. @@ -1288,10 +1287,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1331,10 +1330,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1350,7 +1349,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Result = foldSelectInstWithICmp(SI, ICI)) return Result; - if (Instruction *Add = foldAddSubSelect(SI, *Builder)) + if (Instruction *Add = foldAddSubSelect(SI, Builder)) return Add; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) @@ -1381,16 +1380,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *Cmp; if (CmpInst::isIntPredicate(Pred)) { - Cmp = Builder->CreateICmp(Pred, LHS, RHS); + Cmp = Builder.CreateICmp(Pred, LHS, RHS); } else { - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); auto FMF = cast(SI.getCondition())->getFastMathFlags(); - Builder->setFastMathFlags(FMF); - Cmp = Builder->CreateFCmp(Pred, LHS, RHS); + Builder.setFastMathFlags(FMF); + Cmp = Builder.CreateFCmp(Pred, LHS, RHS); } - Value *NewSI = Builder->CreateCast( - CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), + Value *NewSI = Builder.CreateCast( + CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), SelType); return replaceInstUsesWith(SI, NewSI); } @@ -1425,13 +1424,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value()))); if (NumberOfNots >= 2) { - Value *NewLHS = Builder->CreateNot(LHS); - Value *NewRHS = Builder->CreateNot(RHS); - Value *NewCmp = SPF == SPF_SMAX - ? Builder->CreateICmpSLT(NewLHS, NewRHS) - : Builder->CreateICmpULT(NewLHS, NewRHS); + Value *NewLHS = Builder.CreateNot(LHS); + Value *NewRHS = Builder.CreateNot(RHS); + Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS) + : Builder.CreateICmpULT(NewLHS, NewRHS); Value *NewSI = - Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS)); + Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS)); return replaceInstUsesWith(SI, NewSI); } } @@ -1461,7 +1459,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // We choose this as normal form to enable folding on the And and shortening // paths for the values (this helps GetUnderlyingObjects() for example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); SI.setOperand(0, And); SI.setOperand(1, TrueSI->getTrueValue()); return &SI; @@ -1479,7 +1477,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition()); SI.setOperand(0, Or); SI.setOperand(2, FalseSI->getFalseValue()); return &SI; @@ -1541,7 +1539,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return replaceInstUsesWith(SI, FalseVal); } - if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder)) + if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder)) return BitCastSel; return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 1bb1a85367d1..7ed141c7fd79 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -47,7 +47,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) return BinaryOperator::Create( - I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); + I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. // Because shifts by negative values (which could occur if A were negative) @@ -56,8 +56,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), - Op1->getName()); + Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1), + Op1->getName()); I.setOperand(1, Rem); return &I; } @@ -260,9 +260,9 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (Constant *C = dyn_cast(V)) { if (isLeftShift) - V = IC.Builder->CreateShl(C, NumBits); + V = IC.Builder.CreateShl(C, NumBits); else - V = IC.Builder->CreateLShr(C, NumBits); + V = IC.Builder.CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (auto *C = dyn_cast(V)) if (auto *FoldedC = @@ -289,7 +289,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: case Instruction::LShr: return foldShiftedShift(cast(I), NumBits, isLeftShift, - *(IC.Builder)); + IC.Builder); case Instruction::Select: I->setOperand( @@ -353,7 +353,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *ShAmt = ConstantExpr::getZExt(cast(Op1), TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -375,9 +375,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV), - TI->getName()); + Value *And = Builder.CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -401,10 +401,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -421,11 +421,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), m_ConstantInt(CC)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } LLVM_FALLTHROUGH; @@ -437,10 +436,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -456,10 +455,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -502,7 +501,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -541,7 +540,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { unsigned SrcWidth = X->getType()->getScalarSizeInBits(); if (ShAmt < SrcWidth && MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)) - return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty); + return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty); } // (X >>u C) << C --> X & (-1 << C) @@ -641,7 +640,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { // ctpop.i32(x)>>5 --> zext(x == -1) bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0); - Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); + Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, Ty); } @@ -658,7 +657,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewLShr; } // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2) - Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact()); + Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact()); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask)); } @@ -671,7 +670,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewShl; } // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2) - Value *NewShl = Builder->CreateShl(X, ShiftDiff); + Value *NewShl = Builder.CreateShl(X, ShiftDiff); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); } @@ -692,7 +691,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN if (Op0->hasOneUse()) { - Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1); + Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1); return new ZExtInst(NewLShr, Ty); } } @@ -701,7 +700,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) { // The new shift amount can't be more than the narrow source type. unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); - Value *AShr = Builder->CreateAShr(X, NewShAmt); + Value *AShr = Builder.CreateAShr(X, NewShAmt); return new ZExtInst(AShr, Ty); } } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 03841164b58d..5689c0604239 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -548,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnesValue()) + if (Rem->isMinusOne()) break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { @@ -1627,10 +1627,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) Args.push_back(II->getArgOperand(I)); - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(II); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(II); - CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); + CallInst *NewCall = Builder.CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); @@ -1657,15 +1657,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (NewNumElts == 1) { - return Builder->CreateInsertElement(UndefValue::get(V->getType()), - NewCall, static_cast(0)); + return Builder.CreateInsertElement(UndefValue::get(V->getType()), + NewCall, static_cast(0)); } SmallVector EltMask; for (unsigned I = 0; I < VWidth; ++I) EltMask.push_back(I); - Value *Shuffle = Builder->CreateShuffleVector( + Value *Shuffle = Builder.CreateShuffleVector( NewCall, UndefValue::get(NewTy), EltMask); MadeChange = true; diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 926e46655eb8..dd71a31b644b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -204,11 +204,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (I->hasOneUse() && cheapToScalarize(BO, isa(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); + Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); + Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), newEI0, newEI1, BO); } @@ -250,8 +250,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // Bitcasts can change the number of vector elements, and they cost // nothing. if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { - Value *EE = Builder->CreateExtractElement(CI->getOperand(0), - EI.getIndexOperand()); + Value *EE = Builder.CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } @@ -269,20 +269,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *Cond = SI->getCondition(); if (Cond->getType()->isVectorTy()) { - Cond = Builder->CreateExtractElement(Cond, - EI.getIndexOperand(), - Cond->getName() + ".elt"); + Cond = Builder.CreateExtractElement(Cond, + EI.getIndexOperand(), + Cond->getName() + ".elt"); } Value *V1Elem - = Builder->CreateExtractElement(TrueVal, - EI.getIndexOperand(), - TrueVal->getName() + ".elt"); + = Builder.CreateExtractElement(TrueVal, + EI.getIndexOperand(), + TrueVal->getName() + ".elt"); Value *V2Elem - = Builder->CreateExtractElement(FalseVal, - EI.getIndexOperand(), - FalseVal->getName() + ".elt"); + = Builder.CreateExtractElement(FalseVal, + EI.getIndexOperand(), + FalseVal->getName() + ".elt"); return SelectInst::Create(Cond, V1Elem, V2Elem, @@ -837,7 +837,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) return Shuf; - if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder)) + if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) return NewInsElt; // Turn a sequence of inserts that broadcasts a scalar into a single @@ -1020,9 +1020,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { SmallVector MaskValues; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == -1) - MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); + MaskValues.push_back(UndefValue::get(Builder.getInt32Ty())); else - MaskValues.push_back(Builder->getInt32(Mask[i])); + MaskValues.push_back(Builder.getInt32(Mask[i])); } return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), ConstantVector::get(MaskValues)); @@ -1095,7 +1095,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); return InsertElementInst::Create(V, I->getOperand(1), - Builder->getInt32(Index), "", I); + Builder.getInt32(Index), "", I); } } llvm_unreachable("failed to reorder elements of vector instruction!"); @@ -1275,9 +1275,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { UndefValue::get(Int32Ty)); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); - V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(ShuffleMask), - SVI.getName() + ".extract"); + V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(ShuffleMask), + SVI.getName() + ".extract"); BegIdx = 0; } unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; @@ -1287,10 +1287,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { auto *NewBC = BCAlreadyExists ? NewBCs[CastSrcTy] - : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); + : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); if (!BCAlreadyExists) NewBCs[CastSrcTy] = NewBC; - auto *Ext = Builder->CreateExtractElement( + auto *Ext = Builder.CreateExtractElement( NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); // The shufflevector isn't being replaced: the bitcast that used it // is. InstCombine will visit the newly-created instructions. diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 723414635d6f..90e232399155 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -88,7 +88,7 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, DL, GEP); + return llvm::EmitGEPOffset(&Builder, DL, GEP); } /// Return true if it is desirable to convert an integer computation from a @@ -498,8 +498,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, - BinaryOperator &I, +Value *InstCombiner::tryFactorization(BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); @@ -525,9 +524,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V); } } @@ -545,9 +544,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B); } } @@ -610,7 +609,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) + if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common @@ -618,7 +617,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) if (Value *V = - tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) + tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common @@ -626,7 +625,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) if (Value *V = - tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) + tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -644,7 +643,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - C = Builder->CreateBinOp(InnerOpcode, L, R); + C = Builder.CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; } @@ -663,7 +662,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - A = Builder->CreateBinOp(InnerOpcode, L, R); + A = Builder.CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; } @@ -678,18 +677,18 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), SI1->getFalseValue(), SQ.getWithInstruction(&I))) - SI = Builder->CreateSelect(SI0->getCondition(), - Builder->CreateBinOp(TopLevelOpcode, - SI0->getTrueValue(), - SI1->getTrueValue()), - V); + SI = Builder.CreateSelect(SI0->getCondition(), + Builder.CreateBinOp(TopLevelOpcode, + SI0->getTrueValue(), + SI1->getTrueValue()), + V); if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue(), SQ.getWithInstruction(&I))) - SI = Builder->CreateSelect( + SI = Builder.CreateSelect( SI0->getCondition(), V, - Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue())); + Builder.CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue())); if (SI) { SI->takeName(&I); return SI; @@ -751,9 +750,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { } static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast(&I)) - return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType()); + return Builder.CreateCast(Cast->getOpcode(), SO, I.getType()); assert(I.isBinaryOp() && "Unexpected opcode for select folding"); @@ -772,8 +771,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, std::swap(Op0, Op1); auto *BO = cast(&I); - Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName() + ".op"); + Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName() + ".op"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(BO); @@ -791,7 +790,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return nullptr; // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->getScalarType()->isIntegerTy(1)) + if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of @@ -825,13 +824,13 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this); + Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { bool ConstIsRHS = isa(I->getOperand(1)); Constant *C = cast(I->getOperand(ConstIsRHS)); @@ -845,7 +844,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, if (!ConstIsRHS) std::swap(Op0, Op1); - Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); + Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); auto *FPInst = dyn_cast(RI); if (FPInst && isa(FPInst)) FPInst->copyFastMathFlags(I); @@ -916,7 +915,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. if (NonConstBB) - Builder->SetInsertPoint(NonConstBB->getTerminator()); + Builder.SetInsertPoint(NonConstBB->getTerminator()); // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast(&I)) { @@ -948,9 +947,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For // non-vector phis, this transformation was always profitable because // the select would be generated exactly once in the NonConstBB. - Builder->SetInsertPoint(ThisBB->getTerminator()); - InV = Builder->CreateSelect(PN->getIncomingValue(i), - TrueVInPred, FalseVInPred, "phitmp"); + Builder.SetInsertPoint(ThisBB->getTerminator()); + InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, "phitmp"); } NewPN->addIncoming(InV, ThisBB); } @@ -961,16 +960,17 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa(CI)) - InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); else - InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else if (auto *BO = dyn_cast(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this); + Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), + Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -981,8 +981,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); else - InV = Builder->CreateCast(CI->getOpcode(), - PN->getIncomingValue(i), I.getType(), "phitmp"); + InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } @@ -1328,8 +1328,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { /// \brief Creates node of binary operation with the same attributes as the /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *B) { - Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + InstCombiner::BuilderTy &B) { + Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS); // If LHS and RHS are constant, BO won't be a binary operator. if (BinaryOperator *NewBO = dyn_cast(BO)) NewBO->copyIRFlags(&Inst); @@ -1365,7 +1365,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) { Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), RShuf->getOperand(0), Builder); - return Builder->CreateShuffleVector( + return Builder.CreateShuffleVector( NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); } @@ -1404,7 +1404,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Value *NewLHS = isa(LHS) ? C2 : Shuffle->getOperand(0); Value *NewRHS = isa(LHS) ? Shuffle->getOperand(0) : C2; Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - return Builder->CreateShuffleVector(NewBO, + return Builder.CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()), Shuffle->getMask()); } } @@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, NewIndexType, true); + *I = Builder.CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } @@ -1546,10 +1546,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // set that index. PHINode *NewPN; { - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(PN); - NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), - PN->getNumOperands()); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(PN); + NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); } for (auto &I : PN->operands()) @@ -1669,8 +1669,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // pointer arithmetic. if (match(V, m_Neg(m_PtrToInt(m_Value())))) { Operator *Index = cast(V); - Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); - Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1)); return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); } // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) @@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // -> // %0 = GEP i8 addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); + return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1751,10 +1751,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( nullptr, StrippedPtr, Idx, GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, - GEP.getName()); + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1772,9 +1772,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1807,10 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1849,10 +1849,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( SrcElTy, StrippedPtr, Off, GEP.getName()) - : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, - GEP.getName()); + : Builder.CreateGEP(SrcElTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1916,8 +1916,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) - : Builder->CreateGEP(nullptr, Operand, NewIndices); + ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices) + : Builder.CreateGEP(nullptr, Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return replaceInstUsesWith(GEP, NGEP); @@ -2166,8 +2166,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { // free undef -> unreachable. if (isa(Op)) { // Insert a new store to null because we cannot modify the CFG here. - Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return eraseInstFromFunction(FI); } @@ -2281,8 +2281,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // the backend should extend back to a legal type for the target. if (NewWidth > 0 && NewWidth < Known.getBitWidth()) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); - Builder->SetInsertPoint(&SI); - Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc"); + Builder.SetInsertPoint(&SI); + Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc"); SI.setCondition(NewCond); for (auto Case : SI.cases()) { @@ -2339,8 +2339,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). - Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.getIndices()); + Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), makeArrayRef(insi, inse)); } @@ -2415,17 +2415,17 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector Indices; // Prefix an i32 0 since we need the first element. - Indices.push_back(Builder->getInt32(0)); + Indices.push_back(Builder.getInt32(0)); for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); I != E; ++I) - Indices.push_back(Builder->getInt32(*I)); + Indices.push_back(Builder.getInt32(*I)); // We need to insert these at the location of the old load, not at that of // the extractvalue. - Builder->SetInsertPoint(L); - Value *GEP = Builder->CreateInBoundsGEP(L->getType(), - L->getPointerOperand(), Indices); - Instruction *NL = Builder->CreateLoad(GEP); + Builder.SetInsertPoint(L); + Value *GEP = Builder.CreateInBoundsGEP(L->getType(), + L->getPointerOperand(), Indices); + Instruction *NL = Builder.CreateLoad(GEP); // Whatever aliasing information we had for the orignal load must also // hold for the smaller load, so propagate the annotations. AAMDNodes Nodes; @@ -2922,8 +2922,8 @@ bool InstCombiner::run() { } // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I); - Builder->SetCurrentDebugLocation(I->getDebugLoc()); + Builder.SetInsertPoint(I); + Builder.SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -3160,7 +3160,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, + InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, AC, TLI, DT, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 7eea44d6aca0..184940b7ea58 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1230,7 +1230,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, if (auto *Vector = dyn_cast(Mask)) { // dyn_cast as we might get UndefValue if (auto *Masked = dyn_cast(Vector->getOperand(Idx))) { - if (Masked->isNullValue()) + if (Masked->isZero()) // Mask is constant false, so no instrumentation needed. continue; // If we have a true or undef value, fall through to doInstrumentAddress diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 3802f9fbf7db..16e2e6b4e730 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -24,10 +27,10 @@ #include #include -namespace llvm { - #define DEBUG_TYPE "cfgmst" +namespace llvm { + /// \brief An union-find based Minimum Spanning Tree for CFG /// /// Implements a Union-find algorithm to compute Minimum Spanning Tree @@ -220,5 +223,8 @@ template class CFGMST { } }; -#undef DEBUG_TYPE // "cfgmst" } // end namespace llvm + +#undef DEBUG_TYPE // "cfgmst" + +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9c14b0149fdc..db8fa8977947 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -112,7 +112,7 @@ cl::opt DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, cl::desc("Do counter register promotion"), cl::init(false)); cl::opt MaxNumOfPromotionsPerLoop( - cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10), + cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), cl::desc("Max number counter promotions per loop to avoid" " increasing register pressure too much")); @@ -121,10 +121,21 @@ cl::opt MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), cl::desc("Max number of allowed counter promotions")); -cl::opt SpeculativeCounterPromotion( - cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false), - cl::desc("Allow counter promotion for loops with multiple exiting blocks " - " or top-tested loops. ")); +cl::opt SpeculativeCounterPromotionMaxExiting( + cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), + cl::desc("The max number of exiting blocks of a loop to allow " + " speculative counter promotion")); + +cl::opt SpeculativeCounterPromotionToLoop( + cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), + cl::desc("When the option is false, if the target block is in a loop, " + "the promotion will be disallowed unless the promoted counter " + " update can be further/iteratively promoted into an acyclic " + " region.")); + +cl::opt IterativeCounterPromotion( + cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), + cl::desc("Allow counter promotion across the whole loop nest.")); class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -150,6 +161,7 @@ class InstrProfilingLegacyPass : public ModulePass { } }; +/// /// A helper class to promote one counter RMW operation in the loop /// into register update. /// @@ -158,16 +170,19 @@ class InstrProfilingLegacyPass : public ModulePass { /// class PGOCounterPromoterHelper : public LoadAndStorePromoter { public: - PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA, - Value *Init, BasicBlock *PH, - ArrayRef ExitBlocks, - ArrayRef InsertPts) + PGOCounterPromoterHelper( + Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, + BasicBlock *PH, ArrayRef ExitBlocks, + ArrayRef InsertPts, + DenseMap> &LoopToCands, + LoopInfo &LI) : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), - InsertPts(InsertPts) { + InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { assert(isa(L)); assert(isa(S)); SSA.AddAvailableValue(PH, Init); } + void doExtraRewritesBeforeFinalDeletion() const override { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; @@ -179,12 +194,21 @@ class PGOCounterPromoterHelper : public LoadAndStorePromoter { Value *Addr = cast(Store)->getPointerOperand(); IRBuilder<> Builder(InsertPos); if (AtomicCounterUpdatePromoted) + // automic update currently can only be promoted across the current + // loop, not the whole loop nest. Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, AtomicOrdering::SequentiallyConsistent); else { LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted"); auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); - Builder.CreateStore(NewVal, Addr); + auto *NewStore = Builder.CreateStore(NewVal, Addr); + + // Now update the parent loop's candidate list: + if (IterativeCounterPromotion) { + auto *TargetLoop = LI.getLoopFor(ExitBlock); + if (TargetLoop) + LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); + } } } } @@ -193,6 +217,8 @@ class PGOCounterPromoterHelper : public LoadAndStorePromoter { Instruction *Store; ArrayRef ExitBlocks; ArrayRef InsertPts; + DenseMap> &LoopToCandidates; + LoopInfo &LI; }; /// A helper class to do register promotion for all profile counter @@ -200,12 +226,15 @@ class PGOCounterPromoterHelper : public LoadAndStorePromoter { /// class PGOCounterPromoter { public: - PGOCounterPromoter(ArrayRef Cands, Loop &Loop) - : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) { + PGOCounterPromoter( + DenseMap> &LoopToCands, + Loop &CurLoop, LoopInfo &LI) + : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), + LI(LI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; - ParentLoop.getExitBlocks(LoopExitBlocks); + L.getExitBlocks(LoopExitBlocks); for (BasicBlock *ExitBlock : LoopExitBlocks) { if (BlockSet.insert(ExitBlock).second) { @@ -216,55 +245,97 @@ class PGOCounterPromoter { } bool run(int64_t *NumPromoted) { - // We can't insert into a catchswitch. - bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { - return isa(Exit->getTerminator()); - }); - - if (HasCatchSwitch) - return false; - - if (!ParentLoop.hasDedicatedExits()) - return false; - - BasicBlock *PH = ParentLoop.getLoopPreheader(); - if (!PH) - return false; - - BasicBlock *H = ParentLoop.getHeader(); - bool TopTested = - ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H)); - if (!SpeculativeCounterPromotion && - (TopTested || ParentLoop.getExitingBlock() == nullptr)) + unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); + if (MaxProm == 0) return false; unsigned Promoted = 0; - for (auto &Cand : Candidates) { + for (auto &Cand : LoopToCandidates[&L]) { SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, - PH, ExitBlocks, InsertPts); + L.getLoopPreheader(), ExitBlocks, + InsertPts, LoopToCandidates, LI); Promoter.run(SmallVector({Cand.first, Cand.second})); Promoted++; - if (Promoted >= MaxNumOfPromotionsPerLoop) + if (Promoted >= MaxProm) break; + (*NumPromoted)++; if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) break; } DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" - << ParentLoop.getLoopDepth() << ")\n"); + << L.getLoopDepth() << ")\n"); return Promoted != 0; } private: - ArrayRef Candidates; + bool allowSpeculativeCounterPromotion(Loop *LP) { + SmallVector ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return true; + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return false; + return true; + } + + // Returns the max number of Counter Promotions for LP. + unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + // We can't insert into a catchswitch. + SmallVector LoopExitBlocks; + LP->getExitBlocks(LoopExitBlocks); + if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { + return isa(Exit->getTerminator()); + })) + return 0; + + if (!LP->hasDedicatedExits()) + return 0; + + BasicBlock *PH = LP->getLoopPreheader(); + if (!PH) + return 0; + + SmallVector ExitingBlocks; + LP->getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return MaxNumOfPromotionsPerLoop; + + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return 0; + + // Whether the target block is in a loop does not matter: + if (SpeculativeCounterPromotionToLoop) + return MaxNumOfPromotionsPerLoop; + + // Now check the target block: + unsigned MaxProm = MaxNumOfPromotionsPerLoop; + for (auto *TargetBlock : LoopExitBlocks) { + auto *TargetLoop = LI.getLoopFor(TargetBlock); + if (!TargetLoop) + continue; + unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); + unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); + MaxProm = + std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - + PendingCandsInTarget); + } + return MaxProm; + } + + DenseMap> &LoopToCandidates; SmallVector ExitBlocks; SmallVector InsertPts; - Loop &ParentLoop; + Loop &L; + LoopInfo &LI; }; } // end anonymous namespace @@ -349,8 +420,10 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { SmallVector Loops = LI.getLoopsInPreorder(); - for (auto *Loop : Loops) { - PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop); + // Do a post-order traversal of the loops so that counter updates can be + // iteratively hoisted outside the loop nest. + for (auto *Loop : llvm::reverse(Loops)) { + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); Promoter.run(&TotalCountersPromoted); } } diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 363539b2886f..4eb758c69c58 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/IR/BasicBlock.h" @@ -108,4 +108,4 @@ namespace llvm { } // End llvm namespace -#endif +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index df4ee9969c02..1348e0ed0ed0 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2918,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor { if (ClDumpStrictInstructions) dumpInst(I); DEBUG(dbgs() << "DEFAULT: " << I << "\n"); - for (size_t i = 0, n = I.getNumOperands(); i < n; i++) - insertShadowCheck(I.getOperand(i), &I); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) { + Value *Operand = I.getOperand(i); + if (Operand->getType()->isSized()) + insertShadowCheck(Operand, &I); + } setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 0e7d11c55397..8e4bfc0b91bc 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -224,7 +224,7 @@ std::string getBranchCondString(Instruction *TI) { OS << "_Zero"; else if (CV->isOne()) OS << "_One"; - else if (CV->isAllOnesValue()) + else if (CV->isMinusOne()) OS << "_MinusOne"; else OS << "_Const"; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index a991792bf5a3..ec6904486e10 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -379,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument( } static bool isAtomic(Instruction *I) { + // TODO: Ask TTI whether synchronization scope is between threads. if (LoadInst *LI = dyn_cast(I)) - return LI->isAtomic() && LI->getSynchScope() == CrossThread; + return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; if (StoreInst *SI = dyn_cast(I)) - return SI->isAtomic() && SI->getSynchScope() == CrossThread; + return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; if (isa(I)) return true; if (isa(I)) @@ -676,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; - Function *F = FI->getSynchScope() == SingleThread ? + Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ? TsanAtomicSignalFence : TsanAtomicThreadFence; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index a49c9b68c97d..122c9314e022 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include using namespace llvm; @@ -55,7 +56,7 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); static cl::opt ConstHoistWithBlockFrequency( - "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + "consthoist-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to reduce the " "chance to execute const materialization more frequently than " "without hoisting.")); @@ -231,7 +232,8 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, // Return the optimal insert points in BBs. if (Node == Entry) { BBs.clear(); - if (InsertPtsFreq > BFI.getBlockFreq(Node)) + if (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)) BBs.insert(Entry); else BBs.insert(InsertPts.begin(), InsertPts.end()); @@ -244,7 +246,15 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. - if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + // Don't hoist to EHPad because we may not find a proper place to insert + // in EHPad. + // If the total frequency of InsertPts is the same as the frequency of the + // target Node, and InsertPts contains more than one nodes, choose hoisting + // to reduce code size. + if (NodeInBBs || + (!Node->isEHPad() && + (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) { ParentInsertPts.insert(Node); ParentPtsFreq += BFI.getBlockFreq(Node); } else { @@ -392,42 +402,15 @@ void ConstantHoistingPass::collectConstantCandidates( if (Inst->isCast()) return; - // Can't handle inline asm. Skip it. - if (auto Call = dyn_cast(Inst)) - if (isa(Call->getCalledValue())) - return; - - // Switch cases must remain constant, and if the value being tested is - // constant the entire thing should disappear. - if (isa(Inst)) - return; - - // Static allocas (constant size in the entry block) are handled by - // prologue/epilogue insertion so they're free anyway. We definitely don't - // want to make them non-constant. - auto AI = dyn_cast(Inst); - if (AI && AI->isStaticAlloca()) - return; - - // Constants in GEPs that index into a struct type should not be hoisted. - if (isa(Inst)) { - gep_type_iterator GTI = gep_type_begin(Inst); - - // Collect constant for first operand. - collectConstantCandidates(ConstCandMap, Inst, 0); - // Scan rest operands. - for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) { - // Only collect constants that index into a non struct type. - if (!GTI.isStruct()) { - collectConstantCandidates(ConstCandMap, Inst, Idx); - } - } - return; - } - // Scan all operands. for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { - collectConstantCandidates(ConstCandMap, Inst, Idx); + // The cost of materializing the constants (defined in + // `TargetTransformInfo::getIntImmCost`) for instructions which only take + // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So + // it's safe for us to collect constant candidates from all IntrinsicInsts. + if (canReplaceOperandWithVariable(Inst, Idx) || isa(Inst)) { + collectConstantCandidates(ConstCandMap, Inst, Idx); + } } // end of for all operands } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 0f92760a874b..7fd77a082b82 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -670,7 +670,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (auto *KnownCond = AvailableValues.lookup(CondI)) { // Is the condition known to be true? if (isa(KnownCond) && - cast(KnownCond)->isOneValue()) { + cast(KnownCond)->isOne()) { DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); removeMSSA(Inst); Inst->eraseFromParent(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c0f628eb61e6..0fe72f3f7331 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; - bool commutative; SmallVector varargs; - Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} + Expression(uint32_t o = ~2U) : opcode(o) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); - e.commutative = true; } if (CmpInst *C = dyn_cast(I)) { @@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; - e.commutative = true; } else if (InsertValueInst *E = dyn_cast(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; - e.commutative = true; return e; } @@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); - if (PHINode *PN = dyn_cast(V)) - NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - auto ValNum = assignExpNewValueNum(exp); - if (ValNum.second) { - valueNumbering[C] = ValNum.first; - return ValNum.first; + uint32_t &e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; } if (!MD) { - uint32_t e = assignExpNewValueNum(exp).first; + e = nextValueNumber++; valueNumbering[C] = e; return e; } @@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast(I)); break; - case Instruction::PHI: - valueNumbering[V] = nextValueNumber; - NumberingPhi[nextValueNumber] = cast(V); - return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { +uint32_t GVN::ValueTable::lookup(Value *V) const { DenseMap::const_iterator VI = valueNumbering.find(V); - if (Verify) { - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; - } - return (VI != valueNumbering.end()) ? VI->second : 0; + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; } /// Returns the value number of the given comparison, @@ -559,28 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - return assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + return e; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); - NumberingPhi.clear(); - PhiTranslateTable.clear(); nextValueNumber = 1; - Expressions.clear(); - ExprIdx.clear(); - nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { - uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); - // If V is PHINode, V <--> value number is an one-to-one mapping. - if (isa(V)) - NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -1183,7 +1166,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", LI->isVolatile(), LI->getAlignment(), - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); // Transfer the old load's AA tags to the new load. @@ -1219,7 +1202,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, V->takeName(LI); if (Instruction *I = dyn_cast(V)) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) @@ -1306,7 +1289,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // to propagate LI's DebugLoc because LI may not post-dominate I. if (LI->getDebugLoc() && LI->getParent() == I->getParent()) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ++NumGVNLoad; @@ -1460,7 +1443,7 @@ bool GVN::processLoad(LoadInst *L) { reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more // information after forwarding it. - if (MD && AvailableValue->getType()->getScalarType()->isPointerTy()) + if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(AvailableValue); return true; } @@ -1468,95 +1451,6 @@ bool GVN::processLoad(LoadInst *L) { return false; } -/// Return a pair the first field showing the value number of \p Exp and the -/// second field showing whether it is a value number newly created. -std::pair -GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { - uint32_t &e = expressionNumbering[Exp]; - bool CreateNewValNum = !e; - if (CreateNewValNum) { - Expressions.push_back(Exp); - if (ExprIdx.size() < nextValueNumber + 1) - ExprIdx.resize(nextValueNumber * 2); - e = nextValueNumber; - ExprIdx[nextValueNumber++] = nextExprNumber++; - } - return {e, CreateNewValNum}; -} - -/// Return whether all the values related with the same \p num are -/// defined in \p BB. -bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, - GVN &Gvn) { - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals && Vals->BB == BB) - Vals = Vals->Next; - return !Vals; -} - -/// Wrap phiTranslateImpl to provide caching functionality. -uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, - const BasicBlock *PhiBlock, uint32_t Num, - GVN &Gvn) { - auto FindRes = PhiTranslateTable.find({Num, Pred}); - if (FindRes != PhiTranslateTable.end()) - return FindRes->second; - uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); - PhiTranslateTable.insert({{Num, Pred}, NewNum}); - return NewNum; -} - -/// Translate value number \p Num using phis, so that it has the values of -/// the phis in BB. -uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, - const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn) { - if (PHINode *PN = NumberingPhi[Num]) { - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) - if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) - return TransVal; - } - return Num; - } - - // If there is any value related with Num is defined in a BB other than - // PhiBlock, it cannot depend on a phi in PhiBlock without going through - // a backedge. We can do an early exit in that case to save compile time. - if (!areAllValsInBB(Num, PhiBlock, Gvn)) - return Num; - - if (Num >= ExprIdx.size() || ExprIdx[Num] == 0) - return Num; - Expression Exp = Expressions[ExprIdx[Num]]; - - for (unsigned i = 0; i < Exp.varargs.size(); i++) { - // For InsertValue and ExtractValue, some varargs are index numbers - // instead of value numbers. Those index numbers should not be - // translated. - if ((i > 1 && Exp.opcode == Instruction::InsertValue) || - (i > 0 && Exp.opcode == Instruction::ExtractValue)) - continue; - Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); - } - - if (Exp.commutative) { - assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); - if (Exp.varargs[0] > Exp.varargs[1]) { - std::swap(Exp.varargs[0], Exp.varargs[1]); - uint32_t Opcode = Exp.opcode >> 8; - if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) - Exp.opcode = (Opcode << 8) | - CmpInst::getSwappedPredicate( - static_cast(Exp.opcode & 255)); - } - } - - if (uint32_t NewNum = expressionNumbering[Exp]) - return NewNum; - return Num; -} - // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1601,15 +1495,6 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, return Pred != nullptr; } - -void GVN::assignBlockRPONumber(Function &F) { - uint32_t NextBlockNumber = 1; - ReversePostOrderTraversal RPOT(&F); - for (BasicBlock *BB : RPOT) - BlockRPONumber[BB] = NextBlockNumber++; -} - - // Tries to replace instruction with const, using information from // ReplaceWithConstMap. bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { @@ -1713,7 +1598,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // RHS neither 'true' nor 'false' - bail out. continue; // Whether RHS equals 'true'. Otherwise it equals 'false'. - bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownTrue = CI->isMinusOne(); bool isKnownFalse = !isKnownTrue; // If "A && B" is known true then both A and B are known true. If "A || B" @@ -1813,7 +1698,7 @@ bool GVN::processInstruction(Instruction *I) { Changed = true; } if (Changed) { - if (MD && V->getType()->getScalarType()->isPointerTy()) + if (MD && V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); ++NumGVNSimpl; return true; @@ -1924,7 +1809,7 @@ bool GVN::processInstruction(Instruction *I) { // Remove it! patchAndReplaceAllUsesWith(I, Repl); - if (MD && Repl->getType()->getScalarType()->isPointerTy()) + if (MD && Repl->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Repl); markInstructionForDeletion(I); return true; @@ -1971,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); - assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -2043,7 +1927,7 @@ bool GVN::processBlock(BasicBlock *BB) { // Instantiate an expression in a predecessor that lacked it. bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, - BasicBlock *Curr, unsigned int ValNo) { + unsigned int ValNo) { // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't originally present will have been instantiated earlier @@ -2061,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - uint32_t TValNo = - VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this); - if (Value *V = findLeader(Pred, TValNo)) { + if (Value *V = findLeader(Pred, VN.lookup(Op))) { Instr->setOperand(i, V); } else { success = false; @@ -2080,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - - unsigned Num = VN.lookupOrAdd(Instr); - VN.add(Instr, Num); + VN.add(Instr, ValNo); // Update the availability map to include the new instruction. - addToLeaderTable(Num, Instr, Pred); + addToLeaderTable(ValNo, Instr, Pred); return true; } @@ -2123,27 +2003,18 @@ bool GVN::performScalarPRE(Instruction *CurInst) { SmallVector, 8> predMap; for (BasicBlock *P : predecessors(CurrentBlock)) { - // We're not interested in PRE where blocks with predecessors that are - // not reachable. - if (!DT->isReachableFromEntry(P)) { + // We're not interested in PRE where the block is its + // own predecessor, or in blocks with predecessors + // that are not reachable. + if (P == CurrentBlock) { NumWithout = 2; break; - } - // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and - // when CurInst has operand defined in CurrentBlock (so it may be defined - // by phi in the loop header). - if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] && - any_of(CurInst->operands(), [&](const Use &U) { - if (auto *Inst = dyn_cast(U.get())) - return Inst->getParent() == CurrentBlock; - return false; - })) { + } else if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; } - uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); - Value *predV = findLeader(P, TValNo); + Value *predV = findLeader(P, ValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast(nullptr), P)); PREPred = P; @@ -2183,7 +2054,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { } // We need to insert somewhere, so let's give it a shot PREInstr = CurInst->clone(); - if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) { + if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); PREInstr->deleteValue(); @@ -2212,7 +2083,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->getScalarType()->isPointerTy()) + if (MD && Phi->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); removeFromLeaderTable(ValNo, CurInst, CurrentBlock); @@ -2297,7 +2168,6 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); LeaderTable.clear(); - BlockRPONumber.clear(); TableAllocator.Reset(); } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 3c8fbd35bf8c..89b28f0aeee6 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -232,7 +232,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec:{ const ConstantInt *IsVolatile = dyn_cast(II->getArgOperand(4)); - if (!IsVolatile || !IsVolatile->isNullValue()) + if (!IsVolatile || !IsVolatile->isZero()) return false; LLVM_FALLTHROUGH; @@ -358,7 +358,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(TopVal); + if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 05293eb0079f..ee3de51b1360 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1212,7 +1212,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LoadInst *NewVal = new LoadInst( LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(), - LI->getSynchScope(), UnavailablePred->getTerminator()); + LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index c41cc42db5e2..ac4dd44a0e90 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -148,25 +148,27 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - // We can only remove the loop if there is a preheader that we can - // branch from after removing it. + // We can only remove the loop if there is a preheader that we can branch from + // after removing it. Also, if LoopSimplify form is not available, stay out + // of trouble. BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) + if (!Preheader || !L->hasDedicatedExits()) { + DEBUG(dbgs() + << "Deletion requires Loop with preheader and dedicated exits.\n"); return false; - - // If LoopSimplify form is not available, stay out of trouble. - if (!L->hasDedicatedExits()) - return false; - + } // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. - if (L->begin() != L->end()) + if (L->begin() != L->end()) { + DEBUG(dbgs() << "Loop contains subloops.\n"); return false; + } BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { + DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); // Set incoming value to undef for phi nodes in the exit block. BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast(BI)) { @@ -188,20 +190,26 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - if (!ExitBlock) + if (!ExitBlock) { + DEBUG(dbgs() << "Deletion requires single exit block\n"); return false; - + } // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) + if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) { + DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n"); return Changed; + } // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. const SCEV *S = SE.getMaxBackedgeTakenCount(L); - if (isa(S)) + if (isa(S)) { + DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n"); return Changed; + } + DEBUG(dbgs() << "Loop is invariant, delete it!"); deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; @@ -311,6 +319,9 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { + + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L.dump()); if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater)) return PreservedAnalyses::all(); @@ -350,5 +361,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { ScalarEvolution &SE = getAnalysis().getSE(); LoopInfo &LI = getAnalysis().getLoopInfo(); + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L->dump()); return deleteLoopIfDead(L, DT, SE, LI); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8b435050ac76..4a6a35c0ab1b 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1160,7 +1160,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Dec || !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || (SubInst->getOpcode() == Instruction::Add && - Dec->isAllOnesValue()))) { + Dec->isMinusOne()))) { return false; } } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 9f3875a3027f..606136dc31a4 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -757,8 +757,11 @@ bool LoopInterchangeLegality::currentLimitations() { PHINode *InnerInductionVar; SmallVector Inductions; SmallVector Reductions; - if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) + if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " + << "are supported currently.\n"); return true; + } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { @@ -771,16 +774,25 @@ bool LoopInterchangeLegality::currentLimitations() { InnerInductionVar = Inductions.pop_back_val(); Reductions.clear(); - if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) + if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " + << "are supported currently.\n"); return true; + } // Outer loop cannot have reduction because then loops will not be tightly // nested. - if (!Reductions.empty()) + if (!Reductions.empty()) { + DEBUG(dbgs() << "Outer loops with reductions are not supported " + << "currently.\n"); return true; + } // TODO: Currently we handle only loops with 1 induction variable. - if (Inductions.size() != 1) + if (Inductions.size() != 1) { + DEBUG(dbgs() << "Loops with more than 1 induction variables are not " + << "supported currently.\n"); return true; + } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { @@ -791,12 +803,16 @@ bool LoopInterchangeLegality::currentLimitations() { // TODO: We only handle LCSSA PHI's corresponding to reduction for now. BasicBlock *LoopExitBlock = getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); return true; + } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); return true; + } // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have @@ -816,8 +832,11 @@ bool LoopInterchangeLegality::currentLimitations() { InnerIndexVarInc = dyn_cast(InnerInductionVar->getIncomingValue(0)); - if (!InnerIndexVarInc) + if (!InnerIndexVarInc) { + DEBUG(dbgs() << "Did not find an instruction to increment the induction " + << "variable.\n"); return true; + } // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch @@ -827,19 +846,24 @@ bool LoopInterchangeLegality::currentLimitations() { for (const Instruction &I : reverse(*InnerLoopLatch)) { if (isa(I) || isa(I) || isa(I)) continue; + // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. - if (!I.isIdenticalTo(InnerIndexVarInc)) + if (!I.isIdenticalTo(InnerIndexVarInc)) { + DEBUG(dbgs() << "Found unsupported instructions between induction " + << "variable increment and branch.\n"); return true; + } FoundInduction = true; break; } // The loop latch ended and we didn't find the induction variable return as // current limitation. - if (!FoundInduction) + if (!FoundInduction) { + DEBUG(dbgs() << "Did not find the induction variable.\n"); return true; - + } return false; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7312d97f8efe..3506ac343d59 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -485,10 +485,22 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { DomTreeNode *Node = HeaderChildren[I]; BasicBlock *BB = Node->getBlock(); - pred_iterator PI = pred_begin(BB); - BasicBlock *NearestDom = *PI; - for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) - NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + BasicBlock *NearestDom = nullptr; + for (BasicBlock *Pred : predecessors(BB)) { + // Consider only reachable basic blocks. + if (!DT->getNode(Pred)) + continue; + + if (!NearestDom) { + NearestDom = Pred; + continue; + } + + NearestDom = DT->findNearestCommonDominator(NearestDom, Pred); + assert(NearestDom && "No NearestCommonDominator found"); + } + + assert(NearestDom && "Nearest dominator not found"); // Remember if this changes the DomTree. if (Node->getIDom()->getBlock() != NearestDom) { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 73436f13c94e..3638da118cb7 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -140,6 +140,13 @@ static cl::opt LSRExpNarrow( cl::desc("Narrow LSR complex solution using" " expectation of registers number")); +// Flag to narrow search space by filtering non-optimal formulae with +// the same ScaledReg and Scale. +static cl::opt FilterSameScaledReg( + "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Narrow LSR search space by filtering non-optimal formulae" + " with the same ScaledReg and Scale")); + #ifndef NDEBUG // Stress test IV chain generation. static cl::opt StressIVChain( @@ -1902,6 +1909,7 @@ class LSRInstance { void NarrowSearchSpaceByDetectingSupersets(); void NarrowSearchSpaceByCollapsingUnrolledCode(); void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); void NarrowSearchSpaceByDeletingCostlyFormulas(); void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); @@ -2318,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() { dyn_cast_or_null(getExactSDiv(B, A, SE))) { const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (C->isOne() || C->isAllOnesValue()) + if (C->isOne() || C->isMinusOne()) goto decline_post_inc; // Avoid weird situations. if (C->getValue().getMinSignedBits() >= 64 || @@ -4306,6 +4314,104 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ } } +/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. +/// Pick the best one and delete the others. +/// This narrowing heuristic is to keep as many formulae with different +/// Scale and ScaledReg pair as possible while narrowing the search space. +/// The benefit is that it is more likely to find out a better solution +/// from a formulae set with more Scale and ScaledReg variations than +/// a formulae set with the same Scale and ScaledReg. The picking winner +/// reg heurstic will often keep the formulae with the same Scale and +/// ScaledReg and filter others, and we want to avoid that if possible. +void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; + + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by choosing the best Formula " + "from the Formulae with the same Scale and ScaledReg.\n"); + + // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. + typedef DenseMap, size_t> BestFormulaeTy; + BestFormulaeTy BestFormulae; +#ifndef NDEBUG + bool ChangedFormulae = false; +#endif + DenseSet VisitedRegs; + SmallPtrSet Regs; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + + // Return true if Formula FA is better than Formula FB. + auto IsBetterThan = [&](Formula &FA, Formula &FB) { + // First we will try to choose the Formula with fewer new registers. + // For a register used by current Formula, the more the register is + // shared among LSRUses, the less we increase the register number + // counter of the formula. + size_t FARegNum = 0; + for (const SCEV *Reg : FA.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FARegNum += (NumUses - UsedByIndices.count() + 1); + } + size_t FBRegNum = 0; + for (const SCEV *Reg : FB.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FBRegNum += (NumUses - UsedByIndices.count() + 1); + } + if (FARegNum != FBRegNum) + return FARegNum < FBRegNum; + + // If the new register numbers are the same, choose the Formula with + // less Cost. + Cost CostFA, CostFB; + Regs.clear(); + CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU); + Regs.clear(); + CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU); + return CostFA.isLess(CostFB, TTI); + }; + + bool Any = false; + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; + ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + if (!F.ScaledReg) + continue; + auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); + if (P.second) + continue; + + Formula &Best = LU.Formulae[P.first->second]; + if (IsBetterThan(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); +#ifndef NDEBUG + ChangedFormulae = true; +#endif + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; + } + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. + BestFormulae.clear(); + } + + DEBUG(if (ChangedFormulae) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} + /// The function delete formulas with high registers number expectation. /// Assuming we don't know the value of each formula (already delete /// all inefficient), generate probability of not selecting for each @@ -4516,6 +4622,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByDetectingSupersets(); NarrowSearchSpaceByCollapsingUnrolledCode(); NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + if (FilterSameScaledReg) + NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); if (LSRExpNarrow) NarrowSearchSpaceByDeletingCostlyFormulas(); else diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index acd3ef6791be..6727cf0179c1 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -238,7 +238,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, &BB->front()); NewPN->addIncoming(Opd1, S0->getParent()); NewPN->addIncoming(Opd2, S1->getParent()); - if (MD && NewPN->getType()->getScalarType()->isPointerTy()) + if (MD && NewPN->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(NewPN); return NewPN; } diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 9cf01c6582b5..9d018563618e 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -866,9 +866,7 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, // Things in TOPClass are equivalent to everything. if (ValueToClass.lookup(*U) == TOPClass) return false; - if (lookupOperandLeader(*U) == PN) - return false; - return true; + return lookupOperandLeader(*U) != PN; }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { @@ -2063,9 +2061,10 @@ Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { // // The invariants of this function are: // -// I must be moving to NewClass from OldClass The StoreCount of OldClass and -// NewClass is expected to have been updated for I already if it is is a store. -// The OldClass memory leader has not been updated yet if I was the leader. +// - I must be moving to NewClass from OldClass +// - The StoreCount of OldClass and NewClass is expected to have been updated +// for I already if it is is a store. +// - The OldClass memory leader has not been updated yet if I was the leader. void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, MemoryAccess *InstMA, CongruenceClass *OldClass, @@ -2074,7 +2073,8 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, // be the MemoryAccess of OldClass. assert((!InstMA || !OldClass->getMemoryLeader() || OldClass->getLeader() != I || - OldClass->getMemoryLeader() == InstMA) && + MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) == + MemoryAccessToClass.lookup(InstMA)) && "Representative MemoryAccess mismatch"); // First, see what happens to the new class if (!NewClass->getMemoryLeader()) { @@ -2136,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, << NewClass->getID() << " from " << *NewClass->getLeader() << " to " << *SI << " because store joined class\n"); // If we changed the leader, we have to mark it changed because we don't - // know what it will do to symbolic evlauation. + // know what it will do to symbolic evaluation. NewClass->setLeader(SI); } // We rely on the code below handling the MemoryAccess change. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index cdba0062953f..29d1ba406ae4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -2148,7 +2148,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { if (I->getOpcode() == Instruction::Mul && cast(I->user_back())->getOpcode() == Instruction::Add && isa(Ops.back().Op) && - cast(Ops.back().Op)->isAllOnesValue()) { + cast(Ops.back().Op)->isMinusOne()) { ValueEntry Tmp = Ops.pop_back_val(); Ops.insert(Ops.begin(), Tmp); } else if (I->getOpcode() == Instruction::FMul && diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index a73e9aec0617..f19d45329d23 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1994,7 +1994,7 @@ static void rematerializeLiveValues(CallSite CS, Instruction *LastClonedValue = nullptr; Instruction *LastValue = nullptr; for (Instruction *Instr: ChainToBase) { - // Only GEP's and casts are suported as we need to be careful to not + // Only GEP's and casts are supported as we need to be careful to not // introduce any new uses of pointers not in the liveset. // Note that it's fine to introduce new uses of pointers which were // otherwise not used after this statepoint. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 7a6fa1711411..a738ebb4607e 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -963,7 +963,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { } else { // X or -1 = -1 if (ConstantInt *CI = NonOverdefVal->getConstantInt()) - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return markConstant(IV, &I, NonOverdefVal->getConstant()); } } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 4729f4ef5956..b9cee5b2ba95 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1673,8 +1673,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need inttoptr for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isIntegerTy() && - NewTy->getScalarType()->isPointerTy()) { + if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), @@ -1690,8 +1689,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need ptrtoint for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isPointerTy() && - NewTy->getScalarType()->isIntegerTy()) { + if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), @@ -2400,7 +2398,7 @@ class llvm::sroa::AllocaSliceRewriter LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); // Any !nonnull metadata or !range metadata on the old load is also valid // on the new load. This is even true in some cases even when the loads @@ -2435,7 +2433,7 @@ class llvm::sroa::AllocaSliceRewriter getSliceAlign(TargetTy), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V = NewLI; IsPtrAdjusted = true; @@ -2578,7 +2576,7 @@ class llvm::sroa::AllocaSliceRewriter } NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); if (SI.isVolatile()) - NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 486f3e5a43d4..0cccb415efdb 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -329,7 +329,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { Loops[Exit] = N->getEntry(); } else { - // Test for sucessors as back edge + // Test for successors as back edge BasicBlock *BB = N->getNodeAs(); BranchInst *Term = cast(BB->getTerminator()); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 314c990293cc..7e75e8847785 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -46,13 +46,21 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + Module *TheModule = F ? F->getParent() : nullptr; + // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { - if (DIFinder && F->getParent() && II->getDebugLoc()) - DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); + if (DIFinder && TheModule) { + if (auto *DDI = dyn_cast(II)) + DIFinder->processDeclare(*TheModule, DDI); + else if (auto *DVI = dyn_cast(II)) + DIFinder->processValue(*TheModule, DVI); + + if (auto DbgLoc = II->getDebugLoc()) + DIFinder->processLocation(*TheModule, DbgLoc.get()); + } Instruction *NewInst = II->clone(); if (II->hasName()) @@ -153,6 +161,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // When we remap instructions, we want to avoid duplicating inlined // DISubprograms, so record all subprograms we find as we duplicate // instructions and then freeze them in the MD map. + // We also record information about dbg.value and dbg.declare to avoid + // duplicating the types. DebugInfoFinder DIFinder; // Loop over all of the basic blocks in the function, cloning them as @@ -193,6 +203,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, } } + for (auto *Type : DIFinder.types()) { + VMap.MD()[Type].reset(Type); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 9f4d9c7e3981..d9294c499309 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. - if (!C->isAllOnesValue()) + if (!C->isMinusOne()) return false; Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 30d8856cfbef..1189714dfab1 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1116,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() { } } - //cerr << "NEW FUNCTION: " << *newFunction; - // verifyFunction(*newFunction); - - // cerr << "OLD FUNCTION: " << *oldFunction; - // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) report_fatal_error("verifyFunction failed!")); return newFunction; diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index c97e544e620a..1328f2f3ec01 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast(Ptr)) { Type *ElemTy = GV->getValueType(); - if (!Size->isAllOnesValue() && + if (!Size->isMinusOne() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 0457294361b5..4a2be3a53176 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -513,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(LI->getOrdering(), cast(R)->getOrdering())) return Res; - if (int Res = - cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope())) + if (int Res = cmpNumbers(LI->getSyncScopeID(), + cast(R)->getSyncScopeID())) return Res; return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), cast(R)->getMetadata(LLVMContext::MD_range)); @@ -529,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(SI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(SI->getSynchScope(), cast(R)->getSynchScope()); + return cmpNumbers(SI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const CmpInst *CI = dyn_cast(L)) return cmpNumbers(CI->getPredicate(), cast(R)->getPredicate()); @@ -584,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(FI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(FI->getSynchScope(), cast(R)->getSynchScope()); + return cmpNumbers(FI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const AtomicCmpXchgInst *CXI = dyn_cast(L)) { if (int Res = cmpNumbers(CXI->isVolatile(), @@ -601,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpOrderings(CXI->getFailureOrdering(), cast(R)->getFailureOrdering())) return Res; - return cmpNumbers(CXI->getSynchScope(), - cast(R)->getSynchScope()); + return cmpNumbers(CXI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const AtomicRMWInst *RMWI = dyn_cast(L)) { if (int Res = cmpNumbers(RMWI->getOperation(), @@ -614,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(RMWI->getOrdering(), cast(R)->getOrdering())) return Res; - return cmpNumbers(RMWI->getSynchScope(), - cast(R)->getSynchScope()); + return cmpNumbers(RMWI->getSyncScopeID(), + cast(R)->getSyncScopeID()); } if (const PHINode *PNL = dyn_cast(L)) { const PHINode *PNR = cast(R); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 5127eba3f9ae..74610613001c 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1662,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { TI->eraseFromParent(); } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. +/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo +/// after modifying the CFG. bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -2168,6 +2169,9 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { return true; case Instruction::Call: case Instruction::Invoke: + // Can't handle inline asm. Skip it. + if (isa(ImmutableCallSite(I).getCalledValue())) + return false; // Many arithmetic intrinsics have no issue taking a // variable, however it's hard to distingish these from // specials such as @llvm.frameaddress that require a constant. @@ -2182,12 +2186,18 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { case Instruction::ShuffleVector: // Shufflevector masks are constant. return OpIdx != 2; + case Instruction::Switch: case Instruction::ExtractValue: - case Instruction::InsertValue: // All operands apart from the first are constant. return OpIdx == 0; + case Instruction::InsertValue: + // All operands apart from the first and the second are constant. + return OpIdx < 2; case Instruction::Alloca: - return false; + // Static allocas (constant size in the entry block) are handled by + // prologue/epilogue insertion so they're free anyway. We definitely don't + // want to make them non-constant. + return !dyn_cast(I)->isStaticAlloca(); case Instruction::GetElementPtr: if (OpIdx == 0) return true; diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 9ad2b707e6b2..5170c68e2915 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -65,9 +65,11 @@ static cl::opt UnrollRuntimeMultiExit( /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, BasicBlock *PreHeader, - BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast(VMap[Latch]); @@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + SmallVector Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) - DT->changeImmediateDominator(Exit, PrologExit); + DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, return nullptr; } +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl &OtherExits, + BasicBlock *LatchExit, bool PreserveLCSSA, + bool UseEpilogRemainder) { + + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!UnrollRuntimeMultiExit) + return false; + // Even if runtime multi exit is enabled, we currently have some correctness + // constrains in unrolling a multi-exit loop. + // We rely on LCSSA form being preserved when the exit blocks are transformed. + if (!PreserveLCSSA) + return false; + SmallVector Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + + // TODO: Support multiple exiting blocks jumping to the `LatchExit` when + // UnrollRuntimeMultiExit is true. This will need updating the logic in + // connectEpilog/connectProlog. + if (!LatchExit->getSinglePredecessor()) { + DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); + return false; + } + // FIXME: We bail out of multi-exit unrolling when epilog loop is generated + // and L is an inner loop. This is because in presence of multiple exits, the + // outer loop is incorrect: we do not add the EpilogPreheader and exit to the + // outer loop. This is automatically handled in the prolog case, so we do not + // have that bug in prolog generation. + if (UseEpilogRemainder && L->getParentLoop()) + return false; + + // All constraints have been satisfied. + return true; +} + + + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { - // for now, only unroll loops that contain a single exit - if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) - return false; + DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + DEBUG(L->dump()); // Make sure the loop is in canonical form. - if (!L->isLoopSimplifyForm()) + if (!L->isLoopSimplifyForm()) { + DEBUG(dbgs() << "Not in simplify form!\n"); return false; + } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); - BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop - if (!LatchExit && !UnrollRuntimeMultiExit) - return false; - // These are exit blocks other than the target of the latch exiting block. - SmallVector OtherExits; BranchInst *LatchBR = cast(Latch->getTerminator()); - unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && + assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); - // Support runtime unrolling for multiple exit blocks and multiple exiting - // blocks. - if (!LatchExit) { - assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " - "unrolling with epilog remainder only!"); - LatchExit = LatchBR->getSuccessor(ExitIndex); - // We rely on LCSSA form being preserved when the exit blocks are - // transformed. - if (!PreserveLCSSA) - return false; - // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This - // will need updating the logic in connectEpilog. - if (!LatchExit->getSinglePredecessor()) - return false; - SmallVector Exits; - L->getUniqueExitBlocks(Exits); - for (auto *BB : Exits) - if (BB != LatchExit) - OtherExits.push_back(BB); + // These are exit blocks other than the target of the latch exiting block. + SmallVector OtherExits; + bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( + L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. + if (!isMultiExitUnrollingEnabled && + (!L->getExitingBlock() || OtherExits.size())) { + DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; } - - assert(LatchExit && "Latch Exit should exist!"); - // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa(BECountSC) || - !BECountSC->getType()->isIntegerTy()) + !BECountSC->getType()->isIntegerTy()) { + DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; + } unsigned BEWidth = cast(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa(TripCountSC)) + if (isa(TripCountSC)) { + DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; + } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; + } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. - if (Log2_32(Count) > BEWidth) + if (Log2_32(Count) > BEWidth) { + DEBUG(dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; + } // Loop structure is the following: // @@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; - if (!newVal) { - assert(isa(Phi->getIncomingValue(i)) && - "VMap should exist for all values except constants!"); + // newVal can be a constant or derived from values outside the loop, and + // hence need not have a VMap value. + if (!newVal) newVal = Phi->getIncomingValue(i); - } Phi->addIncoming(newVal, cast(VMap[Phi->getIncomingBlock(i)])); } @@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } else { // Connect the prolog code to the original loop and update the // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, - VMap, DT, LI, PreserveLCSSA); + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 1c2a60a6b8b2..900450b40061 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,12 +8,256 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +static unsigned getLoopOperandSizeInBytes(Type *Type) { + if (VectorType *VTy = dyn_cast(Type)) { + return VTy->getBitWidth() / 8; + } + + return Type->getPrimitiveSizeInBits() / 8; +} + +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI) { + // No need to expand zero length copies. + if (CopyLen->isZero()) + return; + + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *TypeOfCopyLen = CopyLen->getType(); + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); + + if (LoopEndCount != 0) { + // Split + PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + // Cast the Src and Dst pointers to pointers to the loop operand type (if + // needed). + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); + // Loop Body + Value *SrcGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + // Create the loop branch condition. + Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), + LoopBB, PostLoopBB); + } + + uint64_t BytesCopied = LoopEndCount * LoopOpSize; + uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; + if (RemainingBytes) { + IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() + : InsertBefore); + + // Update the alignment based on the copy size used in the loop body. + SrcAlign = std::min(SrcAlign, LoopOpSize); + DestAlign = std::min(DestAlign, LoopOpSize); + + SmallVector RemainingOps; + TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, + SrcAlign, DestAlign); + + for (auto OpTy : RemainingOps) { + // Calaculate the new index + unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); + uint64_t GepIndex = BytesCopied / OperandSize; + assert(GepIndex * OperandSize == BytesCopied && + "Division should have no Remainder!"); + // Cast source to operand type and load + PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); + Value *CastedSrc = SrcAddr->getType() == SrcPtrType + ? SrcAddr + : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + + // Cast destination to operand type and store. + PointerType *DstPtrType = PointerType::get(OpTy, DstAS); + Value *CastedDst = DstAddr->getType() == DstPtrType + ? DstAddr + : RBuilder.CreateBitCast(DstAddr, DstPtrType); + Value *DstGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + BytesCopied += OperandSize; + } + } + assert(BytesCopied == CopyLen->getZExtValue() && + "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, + unsigned DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, + const TargetTransformInfo &TTI) { + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = + PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + unsigned SrcAS = cast(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast(DstAddr->getType())->getAddressSpace(); + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + // Calculate the loop trip count, and remaining bytes to copy after the loop. + Type *CopyLenType = CopyLen->getType(); + IntegerType *ILengthType = dyn_cast(CopyLenType); + assert(ILengthType && + "expected size argument to memcpy to be an integer type!"); + ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); + Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + IRBuilder<> LoopBuilder(LoopBB); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + + Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + Type *Int8Type = Type::getInt8Ty(Ctx); + if (LoopOpType != Int8Type) { + // Loop body for the residual copy. + BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", + PreLoopBB->getParent(), nullptr); + // Residual loop header. + BasicBlock *ResHeaderBB = BasicBlock::Create( + Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + + // Need to update the pre-loop basic block to branch to the correct place. + // branch to the main loop if the count is non-zero, branch to the residual + // loop if the copy size is smaller then 1 iteration of the main loop but + // non-zero and finally branch to after the residual loop if the memcpy + // size is zero. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, ResHeaderBB); + PreLoopBB->getTerminator()->eraseFromParent(); + + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + ResHeaderBB); + + // Determine if we need to branch to the residual loop or bypass it. + IRBuilder<> RHBuilder(ResHeaderBB); + RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), + ResLoopBB, PostLoopBB); + + // Copy the residual with single byte load/store loop. + IRBuilder<> ResBuilder(ResLoopBB); + PHINode *ResidualIndex = + ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); + ResidualIndex->addIncoming(Zero, ResHeaderBB); + + Value *SrcAsInt8 = + ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); + Value *DstAsInt8 = + ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); + Value *SrcGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); + Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); + ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *ResNewIndex = + ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + + // Create the loop branch condition. + ResBuilder.CreateCondBr( + ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, + PostLoopBB); + } else { + // In this case the loop operand type was a byte, and there is no need for a + // residual loop to copy the remaining memory after the main loop. + // We do however need to patch up the control flow by creating the + // terminators for the preloop block and the memcpy loop. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, PostLoopBB); + PreLoopBB->getTerminator()->eraseFromParent(); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + PostLoopBB); + } +} + void llvm::createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, @@ -208,15 +452,41 @@ static void createMemSetLoop(Instruction *InsertBefore, NewBB); } -void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) { - createMemCpyLoop(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile()); +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, + const TargetTransformInfo &TTI) { + // Original implementation + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile()); + } else { + if (ConstantInt *CI = dyn_cast(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); + } else { + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransfomrInfo */ TTI); + } + } } void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e724b0a28c32..dee658f98393 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5754,8 +5754,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *Dom = BB->getSinglePredecessor()) { auto *PBI = dyn_cast_or_null(Dom->getTerminator()); if (PBI && PBI->isConditional() && - PBI->getSuccessor(0) != PBI->getSuccessor(1) && - (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB); bool CondIsFalse = PBI->getSuccessor(1) == BB; Optional Implication = isImpliedCondition( PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ec8b0d426265..6d90e6b48358 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -80,6 +81,7 @@ namespace { bool IsSigned); bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); }; } @@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); + ICmpInst::Predicate OriginalPred = Pred; if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); @@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); + } else if (ICmpInst::isSigned(OriginalPred) && + SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { + // If we were unable to make anything above, all we can is to canonicalize + // the comparison hoping that it will open the doors for other + // optimizations. If we find out that we compare two non-negative values, + // we turn the instruction's predicate to its unsigned version. Note that + // we cannot rely on Pred here unless we check if we have swapped it. + assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); + DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); + ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; @@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return Changed; } +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, + Value *IVOperand) { + using namespace llvm::PatternMatch; + + if (BO->getOpcode() == Instruction::Shl) { + bool Changed = false; + ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); + for (auto *U : BO->users()) { + const APInt *C; + if (match(U, + m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || + match(U, + m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { + BinaryOperator *Shr = cast(U); + if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { + Shr->setIsExact(true); + Changed = true; + } + } + } + return Changed; + } + + return false; +} + /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, @@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } if (BinaryOperator *BO = dyn_cast(UseOper.first)) { - if (isa(BO) && - strengthenOverflowingOperation(BO, IVOperand)) { + if ((isa(BO) && + strengthenOverflowingOperation(BO, IVOperand)) || + (isa(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. pushIVUsers(IVOperand, Simplified, SimpleIVUsers); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b723b65f35e5..77c0a41929ac 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -656,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); // memchr(x, y, 0) -> null - if (LenC && LenC->isNullValue()) + if (LenC && LenC->isZero()) return Constant::getNullValue(CI->getType()); // From now on we need at least constant length and string. @@ -2280,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return true; if (ConstantInt *ObjSizeCI = dyn_cast(CI->getArgOperand(ObjSizeOp))) { - if (ObjSizeCI->isAllOnesValue()) + if (ObjSizeCI->isMinusOne()) return true; // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 60d9ede2c487..c3feea6a0a41 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, // If the store and reload are the same size, we can always reuse it. if (StoredValSize == LoadedValSize) { // Pointer to Pointer -> use bitcast. - if (StoredValTy->getScalarType()->isPointerTy() && - LoadedTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); } else { // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->getScalarType()->isPointerTy()) + if (TypeToCastTo->isPtrOrPtrVectorTy()) TypeToCastTo = DL.getIntPtrType(TypeToCastTo); if (StoredValTy != TypeToCastTo) StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); // Cast to pointer if the load needs a pointer type. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); } @@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, "canCoerceMustAliasedValueToLoad fail"); // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } @@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, if (LoadedTy != NewIntTy) { // If the result is a pointer, inttoptr. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); else // Otherwise, bitcast. @@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. - if (SrcVal->getType()->getScalarType()->isPointerTy()) + if (SrcVal->getType()->isPtrOrPtrVectorTy()) SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 193cc4d13787..eb82ee283d44 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5315,8 +5315,13 @@ void LoopVectorizationLegality::addInductionPhi( // Both the PHI node itself, and the "post-increment" value feeding // back into the PHI node may have external users. - AllowedExit.insert(Phi); - AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + // We can allow those uses, except if the SCEVs we have for them rely + // on predicates that only hold within the loop, since allowing the exit + // currently means re-using this SCEV outside the loop. + if (PSE.getUnionPredicate().isAlwaysTrue()) { + AllowedExit.insert(Phi); + AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + } DEBUG(dbgs() << "LV: Found an induction variable.\n"); return; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b494526369d6..4425043ad39a 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -860,7 +860,7 @@ class BoUpSLP { bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP); /// Un-bundles a group of instructions. - void cancelScheduling(ArrayRef VL); + void cancelScheduling(ArrayRef VL, Value *OpValue); /// Extends the scheduling region so that V is inside the region. /// \returns true if the region size is within the limit. @@ -1258,7 +1258,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, cast(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); if (Term) { DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1284,7 +1284,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Reuse) { DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); } else { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); } newTreeEntry(VL, Reuse, UserTreeIdx); return; @@ -1301,7 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; @@ -1312,7 +1312,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { LoadInst *L = cast(VL[i]); if (!L->isSimple()) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; @@ -1349,7 +1349,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, break; } - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); if (ReverseConsecutive) { @@ -1376,7 +1376,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned i = 0; i < VL.size(); ++i) { Type *Ty = cast(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; @@ -1404,7 +1404,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, CmpInst *Cmp = cast(VL[i]); if (Cmp->getPredicate() != P0 || Cmp->getOperand(0)->getType() != ComparedTy) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; @@ -1471,7 +1471,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned j = 0; j < VL.size(); ++j) { if (cast(VL[j])->getNumOperands() != 2) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1484,7 +1484,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Type *CurTy = cast(VL[j])->getOperand(0)->getType(); if (Ty0 != CurTy) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1496,7 +1496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (!isa(Op)) { DEBUG( dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1518,7 +1518,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; @@ -1541,7 +1541,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // represented by an intrinsic call Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; @@ -1555,7 +1555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (!CI2 || CI2->getCalledFunction() != Int || getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); @@ -1566,7 +1566,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (hasVectorInstrinsicScalarOpd(ID, 1)) { Value *A1J = CI2->getArgOperand(1); if (A1I != A1J) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument "<< A1I<<"!=" << A1J @@ -1579,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(), CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); @@ -1603,7 +1603,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!isAltShuffle) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; @@ -1631,7 +1631,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return; } default: - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; @@ -3177,17 +3177,18 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, } } if (!Bundle->isReady()) { - cancelScheduling(VL); + cancelScheduling(VL, VL[0]); return false; } return true; } -void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL) { - if (isa(VL[0])) +void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, + Value *OpValue) { + if (isa(OpValue)) return; - ScheduleData *Bundle = getScheduleData(VL[0]); + ScheduleData *Bundle = getScheduleData(OpValue); DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); assert(!Bundle->IsScheduled && "Can't cancel bundle which is already scheduled"); diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 6793a49a2ddc..3e3eff39d637 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -62,7 +62,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) set(LLVM_MAIN_SRC_DIR ${LLVM_BUILD_MAIN_SRC_DIR}) if(APPLE) - set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "") + set(LLVM_ENABLE_LIBCXX ON CACHE BOOL "") endif() set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) @@ -96,15 +96,33 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) # The subdirectories need to treat this as standalone builds set(${canon_name}_STANDALONE_BUILD On) + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(${canon_name}_INSTALL_PREFIX "lib/${LLVM_RUNTIMES_TARGET}/" CACHE STRING "" FORCE) + endif() + endif() + # Setting a variable to let sub-projects detect which other projects # will be included under here. set(HAVE_${canon_name} On) endforeach() + set(SAFE_LLVM_BINARY_DIR ${LLVM_BINARY_DIR}) + set(SAFE_LLVM_LIBRARY_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + set(SAFE_LLVM_RUNTIMES_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) + # We do this in two loops so that HAVE_* is set for each runtime before the # other runtimes are added. foreach(entry ${runtimes}) get_filename_component(projName ${entry} NAME) + + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(LLVM_BINARY_DIR "${LLVM_LIBRARY_DIR}/${LLVM_RUNTIMES_TARGET}") + set(LLVM_LIBRARY_OUTPUT_INTDIR "${LLVM_LIBRARY_DIR}/${LLVM_RUNTIMES_TARGET}/lib") + set(LLVM_RUNTIME_OUTPUT_INTDIR "${LLVM_TOOLS_BINARY_DIR}/${LLVM_RUNTIMES_TARGET}") + endif() + endif() # Between each sub-project we want to cache and clear the LIT properties set_property(GLOBAL PROPERTY LLVM_LIT_TESTSUITES) @@ -123,6 +141,14 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) list(APPEND RUNTIMES_LIT_PARAMS ${LLVM_LIT_PARAMS}) list(APPEND RUNTIMES_LIT_DEPENDS ${LLVM_LIT_DEPENDS}) list(APPEND RUNTIMES_LIT_EXTRA_ARGS ${LLVM_LIT_EXTRA_ARGS}) + + if(LLVM_RUNTIMES_TARGET) + if(NOT "${entry}" MATCHES "compiler-rt") + set(LLVM_BINARY_DIR "${SAFE_LLVM_BINARY_DIR}") + set(LLVM_LIBRARY_OUTPUT_INTDIR "${SAFE_LLVM_LIBRARY_OUTPUT_INTDIR}") + set(LLVM_RUNTIME_OUTPUT_INTDIR "${SAFE_LLVM_RUNTIME_OUTPUT_INTDIR}") + endif() + endif() endforeach() if(LLVM_INCLUDE_TESTS) @@ -147,9 +173,9 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) message(SEND_ERROR "Missing target for runtime component ${component}!") continue() endif() - if(LLVM_INCLUDE_TESTS AND NOT TARGET check-${component}) - message(SEND_ERROR "Missing check target for runtime component ${component}!") - continue() + + if(TARGET check-${component}) + list(APPEND SUB_CHECK_TARGETS check-${component}) endif() if(TARGET install-${component}) @@ -157,14 +183,18 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}) endif() endforeach() - configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + if(LLVM_RUNTIMES_TARGET) + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in + ${LLVM_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) + else() + configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in + ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + endif() endif() else() # if this is included from LLVM's CMake - include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) - set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) include(LLVMExternalProjectUtils) if(NOT LLVM_BUILD_RUNTIMES) @@ -190,6 +220,10 @@ else() # if this is included from LLVM's CMake add_custom_target(builtins) add_custom_target(install-builtins) foreach(target ${LLVM_BUILTIN_TARGETS}) + if(target STREQUAL "default") + set(target ${LLVM_DEFAULT_TARGET_TRIPLE}) + endif() + string(REPLACE "-" ";" builtin_target_list ${target}) foreach(item ${builtin_target_list}) string(TOLOWER "${item}" item_lower) @@ -246,40 +280,137 @@ else() # if this is included from LLVM's CMake list(APPEND runtime_names ${projName}) endforeach() - if(runtimes) + # runtime_register_target(target) + # Utility function to register external runtime target. + function(runtime_register_target target) + if(target STREQUAL LLVM_DEFAULT_TARGET_TRIPLE) + include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + else() + include(${LLVM_BINARY_DIR}/runtimes/${target}/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/${target}/Components.cmake) + endif() foreach(runtime_name ${runtime_names}) - list(APPEND extra_targets - ${runtime_name} - install-${runtime_name} - check-${runtime_name}) + list(APPEND ${target}_extra_targets + ${runtime_name}-${target} + install-${runtime_name}-${target}) + if(LLVM_INCLUDE_TESTS) + list(APPEND ${target}_test_targets check-${runtime_name}-${target}) + endif() + endforeach() + + foreach(name IN LISTS SUB_COMPONENTS SUB_INSTALL_TARGETS) + list(APPEND ${target}_extra_targets "${name}:${name}-${target}") endforeach() if(LLVM_INCLUDE_TESTS) - set(test_targets runtimes-test-depends check-runtimes) - foreach(component ${SUB_COMPONENTS}) - list(APPEND SUB_COMPONENT_CHECK_TARGETS check-${component}) + list(APPEND ${target}_test_targets runtimes-test-depends-${target} check-runtimes-${target}) + foreach(name IN LISTS SUB_CHECK_TARGETS) + list(APPEND ${target}_test_targets "${name}:${name}-${target}") + list(APPEND test_targets ${name}-${target}) endforeach() + set(test_targets "${test_targets}" PARENT_SCOPE) endif() - # Create a runtimes target that uses this file as its top-level CMake file. - # The runtimes target is a configuration of all the runtime libraries - # together in a single CMake invocaiton. - llvm_ExternalProject_Add(runtimes + get_cmake_property(variableNames VARIABLES) + foreach(variableName ${variableNames}) + if(variableName MATCHES "^RUNTIMES_${target}") + string(REPLACE "RUNTIMES_${target}_" "" new_name ${variableName}) + list(APPEND ${target}_extra_args "-D${new_name}=${${variableName}}") + endif() + endforeach() + + if(NOT target STREQUAL LLVM_DEFAULT_TARGET_TRIPLE) + list(APPEND ${target}_extra_args "-DLLVM_RUNTIMES_TARGET=${target}") + endif() + + llvm_ExternalProject_Add(runtimes-${target} ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${deps} # Builtins were built separately above CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} + -DCMAKE_C_COMPILER_TARGET=${target} + -DCMAKE_CXX_COMPILER_TARGET=${target} + -DCMAKE_ASM_COMPILER_TARGET=${target} + -DCMAKE_C_COMPILER_WORKS=ON + -DCMAKE_CXX_COMPILER_WORKS=ON + -DCMAKE_ASM_COMPILER_WORKS=ON + -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON + ${${target}_extra_args} PASSTHROUGH_PREFIXES ${prefixes} - EXTRA_TARGETS ${extra_targets} - ${test_targets} - ${SUB_COMPONENTS} - ${SUB_COMPONENT_CHECK_TARGETS} - ${SUB_INSTALL_TARGETS} + EXTRA_TARGETS ${${target}_extra_targets} + ${${target}_test_targets} USE_TOOLCHAIN ${EXTRA_ARGS}) - + endfunction() + + if(runtimes) + # Create a runtimes target that uses this file as its top-level CMake file. + # The runtimes target is a configuration of all the runtime libraries + # together in a single CMake invocaiton. + if(NOT LLVM_RUNTIME_TARGETS) + include(${LLVM_BINARY_DIR}/runtimes/Components.cmake OPTIONAL) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/Components.cmake) + + foreach(runtime_name ${runtime_names}) + list(APPEND extra_targets + ${runtime_name} + install-${runtime_name}) + if(LLVM_INCLUDE_TESTS) + list(APPEND test_targets check-${runtime_name}) + endif() + endforeach() + + if(LLVM_INCLUDE_TESTS) + list(APPEND test_targets runtimes-test-depends check-runtimes) + foreach(component ${SUB_COMPONENTS}) + list(APPEND SUB_CHECK_TARGETS check-${component}) + endforeach() + endif() + + llvm_ExternalProject_Add(runtimes + ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${deps} + # Builtins were built separately above + CMAKE_ARGS -DCOMPILER_RT_BUILD_BUILTINS=Off + -DLLVM_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS} + PASSTHROUGH_PREFIXES ${prefixes} + EXTRA_TARGETS ${extra_targets} + ${test_targets} + ${SUB_COMPONENTS} + ${SUB_CHECK_TARGETS} + ${SUB_INSTALL_TARGETS} + USE_TOOLCHAIN + ${EXTRA_ARGS}) + else() + add_custom_target(runtimes) + add_custom_target(runtimes-configure) + add_custom_target(install-runtimes) + if(LLVM_INCLUDE_TESTS) + add_custom_target(check-runtimes) + add_custom_target(runtimes-test-depends) + set(test_targets "") + endif() + + foreach(target ${LLVM_RUNTIME_TARGETS}) + if(target STREQUAL "default") + set(target ${LLVM_DEFAULT_TARGET_TRIPLE}) + endif() + + runtime_register_target(${target}) + + add_dependencies(runtimes runtimes-${target}) + add_dependencies(runtimes-configure runtimes-${target}-configure) + add_dependencies(install-runtimes install-runtimes-${target}) + if(LLVM_INCLUDE_TESTS) + add_dependencies(check-runtimes check-runtimes-${target}) + add_dependencies(runtimes-test-depends runtimes-test-depends-${target}) + endif() + endforeach() + endif() + # TODO: This is a hack needed because the libcxx headers are copied into the # build directory during configuration. Without that step the clang in the # build directory cannot find the C++ headers in certain configurations. @@ -292,6 +423,21 @@ else() # if this is included from LLVM's CMake if(LLVM_INCLUDE_TESTS) set_property(GLOBAL APPEND PROPERTY LLVM_ADDITIONAL_TEST_DEPENDS runtimes-test-depends) set_property(GLOBAL APPEND PROPERTY LLVM_ADDITIONAL_TEST_TARGETS check-runtimes) + + set(RUNTIMES_TEST_DEPENDS + FileCheck + count + llvm-nm + llvm-objdump + llvm-xray + not + obj2yaml + sancov + sanstats + ) + foreach(target ${test_targets} ${SUB_CHECK_TARGETS}) + add_dependencies(${target} ${RUNTIMES_TEST_DEPENDS}) + endforeach() endif() endif() endif() diff --git a/runtimes/Components.cmake.in b/runtimes/Components.cmake.in index 6e24ac380d18..1d8fb7ab174c 100644 --- a/runtimes/Components.cmake.in +++ b/runtimes/Components.cmake.in @@ -1,2 +1,3 @@ set(SUB_COMPONENTS @SUB_COMPONENTS@) +set(SUB_CHECK_TARGETS @SUB_CHECK_TARGETS@) set(SUB_INSTALL_TARGETS @SUB_INSTALL_TARGETS@) diff --git a/test/Analysis/BasicAA/unreachable-block.ll b/test/Analysis/BasicAA/unreachable-block.ll index 551d18e3e0fb..d6c149f81661 100644 --- a/test/Analysis/BasicAA/unreachable-block.ll +++ b/test/Analysis/BasicAA/unreachable-block.ll @@ -1,4 +1,4 @@ -; RUN: opt -basicaa -aa-eval -disable-output < %s >& /dev/null +; RUN: opt -basicaa -aa-eval -disable-output < %s > /dev/null 2>&1 ; BasicAA shouldn't infinitely recurse on the use-def cycles in ; unreachable code. diff --git a/test/Analysis/CostModel/X86/slm-arith-costs.ll b/test/Analysis/CostModel/X86/slm-arith-costs.ll index 3673a5d9e067..a767aa30b8ed 100644 --- a/test/Analysis/CostModel/X86/slm-arith-costs.ll +++ b/test/Analysis/CostModel/X86/slm-arith-costs.ll @@ -3,6 +3,20 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +define <2 x i64> @slm-costs_64_vector_add(<2 x i64> %a, <2 x i64> %b) { +entry: +; SLM: cost of 4 {{.*}} add <2 x i64> + %res = add <2 x i64> %a, %b + ret <2 x i64> %res +} + +define <2 x i64> @slm-costs_64_vector_sub(<2 x i64> %a, <2 x i64> %b) { +entry: +; SLM: cost of 4 {{.*}} sub <2 x i64> + %res = sub <2 x i64> %a, %b + ret <2 x i64> %res +} + ; 8bit mul define i8 @slm-costs_8_scalar_mul(i8 %a, i8 %b) { entry: @@ -13,7 +27,7 @@ entry: define <2 x i8> @slm-costs_8_v2_mul(<2 x i8> %a, <2 x i8> %b) { entry: -; SLM: cost of 11 {{.*}} mul nsw <2 x i8> +; SLM: cost of 17 {{.*}} mul nsw <2 x i8> %res = mul nsw <2 x i8> %a, %b ret <2 x i8> %res } @@ -97,7 +111,7 @@ entry: define <2 x i16> @slm-costs_16_v2_mul(<2 x i16> %a, <2 x i16> %b) { entry: -; SLM: cost of 11 {{.*}} mul nsw <2 x i16> +; SLM: cost of 17 {{.*}} mul nsw <2 x i16> %res = mul nsw <2 x i16> %a, %b ret <2 x i16> %res } @@ -181,7 +195,7 @@ entry: define <2 x i32> @slm-costs_32_v2_mul(<2 x i32> %a, <2 x i32> %b) { entry: -; SLM: cost of 11 {{.*}} mul nsw <2 x i32> +; SLM: cost of 17 {{.*}} mul nsw <2 x i32> %res = mul nsw <2 x i32> %a, %b ret <2 x i32> %res } @@ -217,28 +231,28 @@ entry: define <2 x i64> @slm-costs_64_v2_mul(<2 x i64> %a, <2 x i64> %b) { entry: -; SLM: cost of 11 {{.*}} mul nsw <2 x i64> +; SLM: cost of 17 {{.*}} mul nsw <2 x i64> %res = mul nsw <2 x i64> %a, %b ret <2 x i64> %res } define <4 x i64> @slm-costs_64_v4_mul(<4 x i64> %a, <4 x i64> %b) { entry: -; SLM: cost of 22 {{.*}} mul nsw <4 x i64> +; SLM: cost of 34 {{.*}} mul nsw <4 x i64> %res = mul nsw <4 x i64> %a, %b ret <4 x i64> %res } define <8 x i64> @slm-costs_64_v8_mul(<8 x i64> %a, <8 x i64> %b) { entry: -; SLM: cost of 44 {{.*}} mul nsw <8 x i64> +; SLM: cost of 68 {{.*}} mul nsw <8 x i64> %res = mul nsw <8 x i64> %a, %b ret <8 x i64> %res } define <16 x i64> @slm-costs_64_v16_mul(<16 x i64> %a, <16 x i64> %b) { entry: -; SLM: cost of 88 {{.*}} mul nsw <16 x i64> +; SLM: cost of 136 {{.*}} mul nsw <16 x i64> %res = mul nsw <16 x i64> %a, %b ret <16 x i64> %res } diff --git a/test/Analysis/DependenceAnalysis/BasePtrBug.ll b/test/Analysis/DependenceAnalysis/BasePtrBug.ll new file mode 100644 index 000000000000..8de75df7dbdd --- /dev/null +++ b/test/Analysis/DependenceAnalysis/BasePtrBug.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -analyze -basicaa -da | FileCheck %s + +; Test that the dependence analysis generates the correct results when using +; an aliased object that points to a different element in the same array. +; PR33567 - https://bugs.llvm.org/show_bug.cgi?id=33567 + +; void test1(int *A, int *B, int N) { +; int *top = A; +; int *bot = A + N/2; +; for (int i = 0; i < N; i++) +; B[i] = top[i] + bot[i]; +; } + +; CHECK-LABEL: test1 +; CHECK: da analyze - input [*|<]! + +define void @test1(i32* nocapture %A, i32* nocapture %B, i32 %N) #0 { +entry: + %cmp9 = icmp sgt i32 %N, 0 + br i1 %cmp9, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: + %div = sdiv i32 %N, 2 + %bot.gep = getelementptr i32, i32* %A, i32 %div + br label %for.body + +for.body: + %i = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %gep.0 = getelementptr i32, i32* %A, i32 %i + %gep.1 = getelementptr i32, i32* %bot.gep, i32 %i + %gep.B = getelementptr i32, i32* %B, i32 %i + %0 = load i32, i32* %gep.0, align 4 + %1 = load i32, i32* %gep.1, align 4 + %add = add nsw i32 %1, %0 + store i32 %add, i32* %gep.B, align 4 + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + + +; void test2(int *A, unsigned n) { +; int *B = A + 1; +; for (unsigned i = 0; i < n; ++i) { +; A[i] = B[i]; +; } +; } + +; CHECK-LABEL: test2 +; CHECK: da analyze - consistent anti [1]! + +define void @test2(i32*, i32) #3 { + %3 = getelementptr inbounds i32, i32* %0, i64 1 + br label %4 + +;