diff --git a/contrib/compiler-rt/lib/asan/asan_posix.cc b/contrib/compiler-rt/lib/asan/asan_posix.cc index 5c5e0359ad6c..cb2dab2ececb 100644 --- a/contrib/compiler-rt/lib/asan/asan_posix.cc +++ b/contrib/compiler-rt/lib/asan/asan_posix.cc @@ -39,8 +39,8 @@ void AsanOnDeadlySignal(int signo, void *siginfo, void *context) { // ---------------------- TSD ---------------- {{{1 -#if SANITIZER_NETBSD || SANITIZER_FREEBSD -// Thread Static Data cannot be used in early init on NetBSD and FreeBSD. +#if SANITIZER_NETBSD && !ASAN_DYNAMIC +// Thread Static Data cannot be used in early static ASan init on NetBSD. // Reuse the Asan TSD API for compatibility with existing code // with an alternative implementation. diff --git a/contrib/compiler-rt/lib/builtins/cpu_model.c b/contrib/compiler-rt/lib/builtins/cpu_model.c index f953aed959e5..940c5938fef5 100644 --- a/contrib/compiler-rt/lib/builtins/cpu_model.c +++ b/contrib/compiler-rt/lib/builtins/cpu_model.c @@ -543,7 +543,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_BMI); if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) setFeature(FEATURE_AVX2); - if (HasLeaf7 && ((EBX >> 9) & 1)) + if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512F); diff --git a/contrib/compiler-rt/lib/interception/interception_linux.cc b/contrib/compiler-rt/lib/interception/interception_linux.cc index 4b27102a159c..d07f060b5b64 100644 --- a/contrib/compiler-rt/lib/interception/interception_linux.cc +++ b/contrib/compiler-rt/lib/interception/interception_linux.cc @@ -33,7 +33,7 @@ static int StrCmp(const char *s1, const char *s2) { } #endif -static void *GetFuncAddr(const char *name, uptr wrapper_addr) { +static void *GetFuncAddr(const char *name) { #if SANITIZER_NETBSD // FIXME: Find a better way to handle renames if (StrCmp(name, "sigaction")) @@ -47,18 +47,13 @@ static void *GetFuncAddr(const char *name, uptr wrapper_addr) { // want the address of the real definition, though, so look it up using // RTLD_DEFAULT. addr = dlsym(RTLD_DEFAULT, name); - - // In case `name' is not loaded, dlsym ends up finding the actual wrapper. - // We don't want to intercept the wrapper and have it point to itself. - if ((uptr)addr == wrapper_addr) - addr = nullptr; } return addr; } bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func, uptr wrapper) { - void *addr = GetFuncAddr(name, wrapper); + void *addr = GetFuncAddr(name); *ptr_to_real = (uptr)addr; return addr && (func == wrapper); } diff --git a/contrib/compiler-rt/lib/msan/msan_linux.cc b/contrib/compiler-rt/lib/msan/msan_linux.cc index 3b6e6cb85f33..056783088225 100644 --- a/contrib/compiler-rt/lib/msan/msan_linux.cc +++ b/contrib/compiler-rt/lib/msan/msan_linux.cc @@ -174,8 +174,8 @@ void InstallAtExitHandler() { // ---------------------- TSD ---------------- {{{1 -#if SANITIZER_NETBSD || SANITIZER_FREEBSD -// Thread Static Data cannot be used in early init on NetBSD and FreeBSD. +#if SANITIZER_NETBSD +// Thread Static Data cannot be used in early init on NetBSD. // Reuse the MSan TSD API for compatibility with existing code // with an alternative implementation. diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc index f29226b3ee3a..1ec73827b8b1 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_interceptors_ioctl_netbsd.inc @@ -24,7 +24,7 @@ struct ioctl_desc { const char *name; }; -const unsigned ioctl_table_max = 1200; +const unsigned ioctl_table_max = 1236; static ioctl_desc ioctl_table[ioctl_table_max]; static unsigned ioctl_table_size = 0; @@ -645,7 +645,7 @@ static void ioctl_table_fill() { _(SPKRTUNE, NONE, 0); _(SPKRGETVOL, WRITE, sizeof(unsigned int)); _(SPKRSETVOL, READ, sizeof(unsigned int)); -#if 0 /* WIP */ +#if defined(__x86_64__) /* Entries from file: dev/nvmm/nvmm_ioctl.h */ _(NVMM_IOC_CAPABILITY, WRITE, struct_nvmm_ioc_capability_sz); _(NVMM_IOC_MACHINE_CREATE, READWRITE, struct_nvmm_ioc_machine_create_sz); @@ -661,7 +661,11 @@ static void ioctl_table_fill() { _(NVMM_IOC_GPA_UNMAP, READ, struct_nvmm_ioc_gpa_unmap_sz); _(NVMM_IOC_HVA_MAP, READ, struct_nvmm_ioc_hva_map_sz); _(NVMM_IOC_HVA_UNMAP, READ, struct_nvmm_ioc_hva_unmap_sz); + _(NVMM_IOC_CTL, READ, struct_nvmm_ioc_ctl_sz); #endif + /* Entries from file: dev/spi/spi_io.h */ + _(SPI_IOCTL_CONFIGURE, READ, struct_spi_ioctl_configure_sz); + _(SPI_IOCTL_TRANSFER, READ, struct_spi_ioctl_transfer_sz); /* Entries from file: fs/autofs/autofs_ioctl.h */ _(AUTOFSREQUEST, WRITE, struct_autofs_daemon_request_sz); _(AUTOFSDONE, READ, struct_autofs_daemon_done_sz); @@ -895,6 +899,9 @@ static void ioctl_table_fill() { _(AUDIO_GETBUFINFO, WRITE, struct_audio_info_sz); _(AUDIO_SETCHAN, READ, sizeof(int)); _(AUDIO_GETCHAN, WRITE, sizeof(int)); + _(AUDIO_QUERYFORMAT, READWRITE, struct_audio_format_query_sz); + _(AUDIO_GETFORMAT, WRITE, struct_audio_info_sz); + _(AUDIO_SETFORMAT, READ, struct_audio_info_sz); _(AUDIO_MIXER_READ, READWRITE, struct_mixer_ctrl_sz); _(AUDIO_MIXER_WRITE, READWRITE, struct_mixer_ctrl_sz); _(AUDIO_MIXER_DEVINFO, READWRITE, struct_mixer_devinfo_sz); @@ -985,6 +992,7 @@ static void ioctl_table_fill() { _(DIOCMWEDGES, WRITE, sizeof(int)); _(DIOCGSECTORSIZE, WRITE, sizeof(unsigned int)); _(DIOCGMEDIASIZE, WRITE, sizeof(uptr)); + _(DIOCRMWEDGES, WRITE, sizeof(int)); /* Entries from file: sys/drvctlio.h */ _(DRVDETACHDEV, READ, struct_devdetachargs_sz); _(DRVRESCANBUS, READ, struct_devrescanargs_sz); @@ -1206,6 +1214,8 @@ static void ioctl_table_fill() { _(SIOCGETHERCAP, READWRITE, struct_eccapreq_sz); _(SIOCGIFINDEX, READWRITE, struct_ifreq_sz); _(SIOCSETHERCAP, READ, struct_eccapreq_sz); + _(SIOCSIFDESCR, READ, struct_ifreq_sz); + _(SIOCGIFDESCR, READWRITE, struct_ifreq_sz); _(SIOCGUMBINFO, READWRITE, struct_ifreq_sz); _(SIOCSUMBPARAM, READ, struct_ifreq_sz); _(SIOCGUMBPARAM, READWRITE, struct_ifreq_sz); @@ -1335,6 +1345,21 @@ static void ioctl_table_fill() { _(WDOGIOC_TICKLE, NONE, 0); _(WDOGIOC_GTICKLER, WRITE, sizeof(int)); _(WDOGIOC_GWDOGS, READWRITE, struct_wdog_conf_sz); + /* Entries from file: sys/kcov.h */ + _(KCOV_IOC_SETBUFSIZE, READ, sizeof(u64)); + _(KCOV_IOC_ENABLE, READ, sizeof(int)); + _(KCOV_IOC_DISABLE, NONE, 0); + /* Entries from file: sys/ipmi.h */ + _(IPMICTL_RECEIVE_MSG_TRUNC, READWRITE, struct_ipmi_recv_sz); + _(IPMICTL_RECEIVE_MSG, READWRITE, struct_ipmi_recv_sz); + _(IPMICTL_SEND_COMMAND, READ, struct_ipmi_req_sz); + _(IPMICTL_REGISTER_FOR_CMD, READ, struct_ipmi_cmdspec_sz); + _(IPMICTL_UNREGISTER_FOR_CMD, READ, struct_ipmi_cmdspec_sz); + _(IPMICTL_SET_GETS_EVENTS_CMD, READ, sizeof(int)); + _(IPMICTL_SET_MY_ADDRESS_CMD, READ, sizeof(unsigned int)); + _(IPMICTL_GET_MY_ADDRESS_CMD, WRITE, sizeof(unsigned int)); + _(IPMICTL_SET_MY_LUN_CMD, READ, sizeof(unsigned int)); + _(IPMICTL_GET_MY_LUN_CMD, WRITE, sizeof(unsigned int)); /* Entries from file: soundcard.h */ _(SNDCTL_DSP_RESET, NONE, 0); _(SNDCTL_DSP_SYNC, NONE, 0); diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc index 88ab0979bb05..455fd4c861de 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc @@ -779,7 +779,11 @@ int internal_sysctl(const int *name, unsigned int namelen, void *oldp, #if SANITIZER_FREEBSD int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp, const void *newp, uptr newlen) { - return sysctlbyname(sname, oldp, (size_t *)oldlenp, newp, (size_t)newlen); + static decltype(sysctlbyname) *real = nullptr; + if (!real) + real = (decltype(sysctlbyname) *)dlsym(RTLD_NEXT, "sysctlbyname"); + CHECK(real); + return real(sname, oldp, (size_t *)oldlenp, newp, (size_t)newlen); } #endif #endif diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc index b2fb5cb76463..87a55e51c9d0 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.cc @@ -62,6 +62,8 @@ #include #include #include +#include +#include #include #include #include @@ -123,9 +125,6 @@ #include #include #include -#if 0 -#include -#endif #include #include #include @@ -168,6 +167,7 @@ #include #include #include +#include #include #include #include @@ -221,6 +221,10 @@ #include #include #include + +#if defined(__x86_64__) +#include +#endif // clang-format on // Include these after system headers to avoid name clashes and ambiguities. @@ -686,6 +690,26 @@ unsigned struct_usb_config_desc_sz = sizeof(usb_config_desc); unsigned struct_usb_ctl_report_desc_sz = sizeof(usb_ctl_report_desc); unsigned struct_usb_ctl_report_sz = sizeof(usb_ctl_report); unsigned struct_usb_ctl_request_sz = sizeof(usb_ctl_request); +#if defined(__x86_64__) +unsigned struct_nvmm_ioc_capability_sz = sizeof(nvmm_ioc_capability); +unsigned struct_nvmm_ioc_machine_create_sz = sizeof(nvmm_ioc_machine_create); +unsigned struct_nvmm_ioc_machine_destroy_sz = sizeof(nvmm_ioc_machine_destroy); +unsigned struct_nvmm_ioc_machine_configure_sz = + sizeof(nvmm_ioc_machine_configure); +unsigned struct_nvmm_ioc_vcpu_create_sz = sizeof(nvmm_ioc_vcpu_create); +unsigned struct_nvmm_ioc_vcpu_destroy_sz = sizeof(nvmm_ioc_vcpu_destroy); +unsigned struct_nvmm_ioc_vcpu_setstate_sz = sizeof(nvmm_ioc_vcpu_destroy); +unsigned struct_nvmm_ioc_vcpu_getstate_sz = sizeof(nvmm_ioc_vcpu_getstate); +unsigned struct_nvmm_ioc_vcpu_inject_sz = sizeof(nvmm_ioc_vcpu_inject); +unsigned struct_nvmm_ioc_vcpu_run_sz = sizeof(nvmm_ioc_vcpu_run); +unsigned struct_nvmm_ioc_gpa_map_sz = sizeof(nvmm_ioc_gpa_map); +unsigned struct_nvmm_ioc_gpa_unmap_sz = sizeof(nvmm_ioc_gpa_unmap); +unsigned struct_nvmm_ioc_hva_map_sz = sizeof(nvmm_ioc_hva_map); +unsigned struct_nvmm_ioc_hva_unmap_sz = sizeof(nvmm_ioc_hva_unmap); +unsigned struct_nvmm_ioc_ctl_sz = sizeof(nvmm_ioc_ctl); +#endif +unsigned struct_spi_ioctl_configure_sz = sizeof(spi_ioctl_configure); +unsigned struct_spi_ioctl_transfer_sz = sizeof(spi_ioctl_transfer); unsigned struct_autofs_daemon_request_sz = sizeof(autofs_daemon_request); unsigned struct_autofs_daemon_done_sz = sizeof(autofs_daemon_done); unsigned struct_sctp_connectx_addrs_sz = sizeof(sctp_connectx_addrs); @@ -728,6 +752,9 @@ unsigned struct_vnd_user_sz = sizeof(vnd_user); unsigned struct_vt_stat_sz = sizeof(vt_stat); unsigned struct_wdog_conf_sz = sizeof(wdog_conf); unsigned struct_wdog_mode_sz = sizeof(wdog_mode); +unsigned struct_ipmi_recv_sz = sizeof(ipmi_recv); +unsigned struct_ipmi_req_sz = sizeof(ipmi_req); +unsigned struct_ipmi_cmdspec_sz = sizeof(ipmi_cmdspec); unsigned struct_wfq_conf_sz = sizeof(wfq_conf); unsigned struct_wfq_getqid_sz = sizeof(wfq_getqid); unsigned struct_wfq_getstats_sz = sizeof(wfq_getstats); @@ -813,6 +840,7 @@ unsigned struct_iscsi_wait_event_parameters_sz = unsigned struct_isp_stats_sz = sizeof(isp_stats_t); unsigned struct_lsenable_sz = sizeof(struct lsenable); unsigned struct_lsdisable_sz = sizeof(struct lsdisable); +unsigned struct_audio_format_query_sz = sizeof(audio_format_query); unsigned struct_mixer_ctrl_sz = sizeof(struct mixer_ctrl); unsigned struct_mixer_devinfo_sz = sizeof(struct mixer_devinfo); unsigned struct_mpu_command_rec_sz = sizeof(mpu_command_rec); @@ -1423,7 +1451,7 @@ unsigned IOCTL_SPKRTONE = SPKRTONE; unsigned IOCTL_SPKRTUNE = SPKRTUNE; unsigned IOCTL_SPKRGETVOL = SPKRGETVOL; unsigned IOCTL_SPKRSETVOL = SPKRSETVOL; -#if 0 /* interfaces are WIP */ +#if defined(__x86_64__) unsigned IOCTL_NVMM_IOC_CAPABILITY = NVMM_IOC_CAPABILITY; unsigned IOCTL_NVMM_IOC_MACHINE_CREATE = NVMM_IOC_MACHINE_CREATE; unsigned IOCTL_NVMM_IOC_MACHINE_DESTROY = NVMM_IOC_MACHINE_DESTROY; @@ -1438,7 +1466,10 @@ unsigned IOCTL_NVMM_IOC_GPA_MAP = NVMM_IOC_GPA_MAP; unsigned IOCTL_NVMM_IOC_GPA_UNMAP = NVMM_IOC_GPA_UNMAP; unsigned IOCTL_NVMM_IOC_HVA_MAP = NVMM_IOC_HVA_MAP; unsigned IOCTL_NVMM_IOC_HVA_UNMAP = NVMM_IOC_HVA_UNMAP; +unsigned IOCTL_NVMM_IOC_CTL = NVMM_IOC_CTL; #endif +unsigned IOCTL_SPI_IOCTL_CONFIGURE = SPI_IOCTL_CONFIGURE; +unsigned IOCTL_SPI_IOCTL_TRANSFER = SPI_IOCTL_TRANSFER; unsigned IOCTL_AUTOFSREQUEST = AUTOFSREQUEST; unsigned IOCTL_AUTOFSDONE = AUTOFSDONE; unsigned IOCTL_BIOCGBLEN = BIOCGBLEN; @@ -1656,6 +1687,9 @@ unsigned IOCTL_AUDIO_GETPROPS = AUDIO_GETPROPS; unsigned IOCTL_AUDIO_GETBUFINFO = AUDIO_GETBUFINFO; unsigned IOCTL_AUDIO_SETCHAN = AUDIO_SETCHAN; unsigned IOCTL_AUDIO_GETCHAN = AUDIO_GETCHAN; +unsigned IOCTL_AUDIO_QUERYFORMAT = AUDIO_QUERYFORMAT; +unsigned IOCTL_AUDIO_GETFORMAT = AUDIO_GETFORMAT; +unsigned IOCTL_AUDIO_SETFORMAT = AUDIO_SETFORMAT; unsigned IOCTL_AUDIO_MIXER_READ = AUDIO_MIXER_READ; unsigned IOCTL_AUDIO_MIXER_WRITE = AUDIO_MIXER_WRITE; unsigned IOCTL_AUDIO_MIXER_DEVINFO = AUDIO_MIXER_DEVINFO; @@ -1741,6 +1775,7 @@ unsigned IOCTL_DIOCTUR = DIOCTUR; unsigned IOCTL_DIOCMWEDGES = DIOCMWEDGES; unsigned IOCTL_DIOCGSECTORSIZE = DIOCGSECTORSIZE; unsigned IOCTL_DIOCGMEDIASIZE = DIOCGMEDIASIZE; +unsigned IOCTL_DIOCRMWEDGES = DIOCRMWEDGES; unsigned IOCTL_DRVDETACHDEV = DRVDETACHDEV; unsigned IOCTL_DRVRESCANBUS = DRVRESCANBUS; unsigned IOCTL_DRVCTLCOMMAND = DRVCTLCOMMAND; @@ -1945,6 +1980,8 @@ unsigned IOCTL_SIOCSLINKSTR = SIOCSLINKSTR; unsigned IOCTL_SIOCGETHERCAP = SIOCGETHERCAP; unsigned IOCTL_SIOCGIFINDEX = SIOCGIFINDEX; unsigned IOCTL_SIOCSETHERCAP = SIOCSETHERCAP; +unsigned IOCTL_SIOCSIFDESCR = SIOCSIFDESCR; +unsigned IOCTL_SIOCGIFDESCR = SIOCGIFDESCR; unsigned IOCTL_SIOCGUMBINFO = SIOCGUMBINFO; unsigned IOCTL_SIOCSUMBPARAM = SIOCSUMBPARAM; unsigned IOCTL_SIOCGUMBPARAM = SIOCGUMBPARAM; @@ -2069,6 +2106,19 @@ unsigned IOCTL_WDOGIOC_WHICH = WDOGIOC_WHICH; unsigned IOCTL_WDOGIOC_TICKLE = WDOGIOC_TICKLE; unsigned IOCTL_WDOGIOC_GTICKLER = WDOGIOC_GTICKLER; unsigned IOCTL_WDOGIOC_GWDOGS = WDOGIOC_GWDOGS; +unsigned IOCTL_KCOV_IOC_SETBUFSIZE = KCOV_IOC_SETBUFSIZE; +unsigned IOCTL_KCOV_IOC_ENABLE = KCOV_IOC_ENABLE; +unsigned IOCTL_KCOV_IOC_DISABLE = KCOV_IOC_DISABLE; +unsigned IOCTL_IPMICTL_RECEIVE_MSG_TRUNC = IPMICTL_RECEIVE_MSG_TRUNC; +unsigned IOCTL_IPMICTL_RECEIVE_MSG = IPMICTL_RECEIVE_MSG; +unsigned IOCTL_IPMICTL_SEND_COMMAND = IPMICTL_SEND_COMMAND; +unsigned IOCTL_IPMICTL_REGISTER_FOR_CMD = IPMICTL_REGISTER_FOR_CMD; +unsigned IOCTL_IPMICTL_UNREGISTER_FOR_CMD = IPMICTL_UNREGISTER_FOR_CMD; +unsigned IOCTL_IPMICTL_SET_GETS_EVENTS_CMD = IPMICTL_SET_GETS_EVENTS_CMD; +unsigned IOCTL_IPMICTL_SET_MY_ADDRESS_CMD = IPMICTL_SET_MY_ADDRESS_CMD; +unsigned IOCTL_IPMICTL_GET_MY_ADDRESS_CMD = IPMICTL_GET_MY_ADDRESS_CMD; +unsigned IOCTL_IPMICTL_SET_MY_LUN_CMD = IPMICTL_SET_MY_LUN_CMD; +unsigned IOCTL_IPMICTL_GET_MY_LUN_CMD = IPMICTL_GET_MY_LUN_CMD; unsigned IOCTL_SNDCTL_DSP_RESET = SNDCTL_DSP_RESET; unsigned IOCTL_SNDCTL_DSP_SYNC = SNDCTL_DSP_SYNC; unsigned IOCTL_SNDCTL_DSP_SPEED = SNDCTL_DSP_SPEED; diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h index add9852ec6c3..4fb3b8c0e06f 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_netbsd.h @@ -849,6 +849,25 @@ extern unsigned struct_usb_config_desc_sz; extern unsigned struct_usb_ctl_report_desc_sz; extern unsigned struct_usb_ctl_report_sz; extern unsigned struct_usb_ctl_request_sz; +#if defined(__x86_64__) +extern unsigned struct_nvmm_ioc_capability_sz; +extern unsigned struct_nvmm_ioc_machine_create_sz; +extern unsigned struct_nvmm_ioc_machine_destroy_sz; +extern unsigned struct_nvmm_ioc_machine_configure_sz; +extern unsigned struct_nvmm_ioc_vcpu_create_sz; +extern unsigned struct_nvmm_ioc_vcpu_destroy_sz; +extern unsigned struct_nvmm_ioc_vcpu_setstate_sz; +extern unsigned struct_nvmm_ioc_vcpu_getstate_sz; +extern unsigned struct_nvmm_ioc_vcpu_inject_sz; +extern unsigned struct_nvmm_ioc_vcpu_run_sz; +extern unsigned struct_nvmm_ioc_gpa_map_sz; +extern unsigned struct_nvmm_ioc_gpa_unmap_sz; +extern unsigned struct_nvmm_ioc_hva_map_sz; +extern unsigned struct_nvmm_ioc_hva_unmap_sz; +extern unsigned struct_nvmm_ioc_ctl_sz; +#endif +extern unsigned struct_spi_ioctl_configure_sz; +extern unsigned struct_spi_ioctl_transfer_sz; extern unsigned struct_autofs_daemon_request_sz; extern unsigned struct_autofs_daemon_done_sz; extern unsigned struct_sctp_connectx_addrs_sz; @@ -891,6 +910,9 @@ extern unsigned struct_vnd_user_sz; extern unsigned struct_vt_stat_sz; extern unsigned struct_wdog_conf_sz; extern unsigned struct_wdog_mode_sz; +extern unsigned struct_ipmi_recv_sz; +extern unsigned struct_ipmi_req_sz; +extern unsigned struct_ipmi_cmdspec_sz; extern unsigned struct_wfq_conf_sz; extern unsigned struct_wfq_getqid_sz; extern unsigned struct_wfq_getstats_sz; @@ -969,6 +991,7 @@ extern unsigned struct_iscsi_wait_event_parameters_sz; extern unsigned struct_isp_stats_sz; extern unsigned struct_lsenable_sz; extern unsigned struct_lsdisable_sz; +extern unsigned struct_audio_format_query_sz; extern unsigned struct_mixer_ctrl_sz; extern unsigned struct_mixer_devinfo_sz; extern unsigned struct_mpu_command_rec_sz; @@ -1575,7 +1598,7 @@ extern unsigned IOCTL_SPKRTONE; extern unsigned IOCTL_SPKRTUNE; extern unsigned IOCTL_SPKRGETVOL; extern unsigned IOCTL_SPKRSETVOL; -#if 0 /* interfaces are WIP */ +#if defined(__x86_64__) extern unsigned IOCTL_NVMM_IOC_CAPABILITY; extern unsigned IOCTL_NVMM_IOC_MACHINE_CREATE; extern unsigned IOCTL_NVMM_IOC_MACHINE_DESTROY; @@ -1590,6 +1613,7 @@ extern unsigned IOCTL_NVMM_IOC_GPA_MAP; extern unsigned IOCTL_NVMM_IOC_GPA_UNMAP; extern unsigned IOCTL_NVMM_IOC_HVA_MAP; extern unsigned IOCTL_NVMM_IOC_HVA_UNMAP; +extern unsigned IOCTL_NVMM_IOC_CTL; #endif extern unsigned IOCTL_AUTOFSREQUEST; extern unsigned IOCTL_AUTOFSDONE; @@ -1808,6 +1832,9 @@ extern unsigned IOCTL_AUDIO_GETPROPS; extern unsigned IOCTL_AUDIO_GETBUFINFO; extern unsigned IOCTL_AUDIO_SETCHAN; extern unsigned IOCTL_AUDIO_GETCHAN; +extern unsigned IOCTL_AUDIO_QUERYFORMAT; +extern unsigned IOCTL_AUDIO_GETFORMAT; +extern unsigned IOCTL_AUDIO_SETFORMAT; extern unsigned IOCTL_AUDIO_MIXER_READ; extern unsigned IOCTL_AUDIO_MIXER_WRITE; extern unsigned IOCTL_AUDIO_MIXER_DEVINFO; @@ -1893,6 +1920,7 @@ extern unsigned IOCTL_DIOCTUR; extern unsigned IOCTL_DIOCMWEDGES; extern unsigned IOCTL_DIOCGSECTORSIZE; extern unsigned IOCTL_DIOCGMEDIASIZE; +extern unsigned IOCTL_DIOCRMWEDGES; extern unsigned IOCTL_DRVDETACHDEV; extern unsigned IOCTL_DRVRESCANBUS; extern unsigned IOCTL_DRVCTLCOMMAND; @@ -1994,6 +2022,8 @@ extern unsigned IOCTL_SEQUENCER_TMR_TEMPO; extern unsigned IOCTL_SEQUENCER_TMR_SOURCE; extern unsigned IOCTL_SEQUENCER_TMR_METRONOME; extern unsigned IOCTL_SEQUENCER_TMR_SELECT; +extern unsigned IOCTL_SPI_IOCTL_CONFIGURE; +extern unsigned IOCTL_SPI_IOCTL_TRANSFER; extern unsigned IOCTL_MTIOCTOP; extern unsigned IOCTL_MTIOCGET; extern unsigned IOCTL_MTIOCIEOT; @@ -2097,6 +2127,8 @@ extern unsigned IOCTL_SIOCSLINKSTR; extern unsigned IOCTL_SIOCGETHERCAP; extern unsigned IOCTL_SIOCGIFINDEX; extern unsigned IOCTL_SIOCSETHERCAP; +extern unsigned IOCTL_SIOCSIFDESCR; +extern unsigned IOCTL_SIOCGIFDESCR; extern unsigned IOCTL_SIOCGUMBINFO; extern unsigned IOCTL_SIOCSUMBPARAM; extern unsigned IOCTL_SIOCGUMBPARAM; @@ -2221,6 +2253,19 @@ extern unsigned IOCTL_WDOGIOC_WHICH; extern unsigned IOCTL_WDOGIOC_TICKLE; extern unsigned IOCTL_WDOGIOC_GTICKLER; extern unsigned IOCTL_WDOGIOC_GWDOGS; +extern unsigned IOCTL_KCOV_IOC_SETBUFSIZE; +extern unsigned IOCTL_KCOV_IOC_ENABLE; +extern unsigned IOCTL_KCOV_IOC_DISABLE; +extern unsigned IOCTL_IPMICTL_RECEIVE_MSG_TRUNC; +extern unsigned IOCTL_IPMICTL_RECEIVE_MSG; +extern unsigned IOCTL_IPMICTL_SEND_COMMAND; +extern unsigned IOCTL_IPMICTL_REGISTER_FOR_CMD; +extern unsigned IOCTL_IPMICTL_UNREGISTER_FOR_CMD; +extern unsigned IOCTL_IPMICTL_SET_GETS_EVENTS_CMD; +extern unsigned IOCTL_IPMICTL_SET_MY_ADDRESS_CMD; +extern unsigned IOCTL_IPMICTL_GET_MY_ADDRESS_CMD; +extern unsigned IOCTL_IPMICTL_SET_MY_LUN_CMD; +extern unsigned IOCTL_IPMICTL_GET_MY_LUN_CMD; extern unsigned IOCTL_SNDCTL_DSP_RESET; extern unsigned IOCTL_SNDCTL_DSP_SYNC; extern unsigned IOCTL_SNDCTL_DSP_SPEED; diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.cc index 8a2704ff0631..897679128ac3 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl.cc @@ -149,6 +149,7 @@ static void BackgroundThread(void *arg) { // We don't use ScopedIgnoreInterceptors, because we want ignores to be // enabled even when the thread function exits (e.g. during pthread thread // shutdown code). + cur_thread_init(); cur_thread()->ignore_interceptors++; const u64 kMs2Ns = 1000 * 1000; diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h index 948341554f23..282142f51bb3 100644 --- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -949,7 +949,7 @@ template class AAResultBase { /// A pointer to the AAResults object that this AAResult is /// aggregated within. May be null if not aggregated. - AAResults *AAR; + AAResults *AAR = nullptr; /// Helper to dispatch calls back through the derived type. DerivedT &derived() { return static_cast(*this); } diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h index 12a970847021..45a598c898c8 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -269,7 +269,13 @@ class SelectionDAG { using CallSiteInfo = MachineFunction::CallSiteInfo; using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl; - DenseMap SDCallSiteInfo; + + struct CallSiteDbgInfo { + CallSiteInfo CSInfo; + MDNode *HeapAllocSite = nullptr; + }; + + DenseMap SDCallSiteDbgInfo; uint16_t NextPersistentId = 0; @@ -1664,16 +1670,28 @@ class SelectionDAG { } void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) { - SDCallSiteInfo[CallNode] = std::move(CallInfo); + SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo); } CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) { - auto I = SDCallSiteInfo.find(CallNode); - if (I != SDCallSiteInfo.end()) - return std::move(I->second); + auto I = SDCallSiteDbgInfo.find(CallNode); + if (I != SDCallSiteDbgInfo.end()) + return std::move(I->second).CSInfo; return CallSiteInfo(); } + void addHeapAllocSite(const SDNode *Node, MDNode *MD) { + SDCallSiteDbgInfo[Node].HeapAllocSite = MD; + } + + /// Return the HeapAllocSite type associated with the SDNode, if it exists. + MDNode *getHeapAllocSite(const SDNode *Node) { + auto It = SDCallSiteDbgInfo.find(Node); + if (It == SDCallSiteDbgInfo.end()) + return nullptr; + return It->second.HeapAllocSite; + } + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h index d5cca60bb1b2..ca7548cd8d6f 100644 --- a/contrib/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/contrib/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3665,6 +3665,7 @@ class TargetLowering : public TargetLoweringBase { C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. C_Memory, // Memory constraint. + C_Immediate, // Requires an immediate. C_Other, // Something else. C_Unknown // Unsupported constraint. }; diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h index 855e31b33549..84cbc53b73a5 100644 --- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h @@ -16,6 +16,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/OrcV1Deprecation.h" #include namespace llvm { diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h index 5c2124cc0d15..c40cd7c2c257 100644 --- a/contrib/llvm/include/llvm/MC/MCContext.h +++ b/contrib/llvm/include/llvm/MC/MCContext.h @@ -112,6 +112,9 @@ namespace llvm { /// number of section symbols with the same name). StringMap UsedNames; + /// Keeps track of labels that are used in inline assembly. + SymbolTable InlineAsmUsedLabelNames; + /// The next ID to dole out to an unnamed assembler temporary symbol with /// a given prefix. StringMap NextID; @@ -377,6 +380,16 @@ namespace llvm { /// APIs. const SymbolTable &getSymbols() const { return Symbols; } + /// isInlineAsmLabel - Return true if the name is a label referenced in + /// inline assembly. + MCSymbol *getInlineAsmLabel(StringRef Name) const { + return InlineAsmUsedLabelNames.lookup(Name); + } + + /// registerInlineAsmLabel - Records that the name is a label referenced in + /// inline assembly. + void registerInlineAsmLabel(MCSymbol *Sym); + /// @} /// \name Section Management diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def index e152f383b3ec..5cdf190a9f19 100644 --- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def @@ -50,35 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a", #define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) #endif // FIXME: This would be nicer were it tablegen -AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) -AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") -AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") -AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") -AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") -AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") -AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") -AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") -AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") -AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") -AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") -AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") -AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") -AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") -AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") -AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") -AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") -AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") -AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") -AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") -AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") -AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm") -AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") -AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") -AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") -AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") -AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") -AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") +AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr) +AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc") +AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse") +AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm") +AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto") +AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4") +AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3") +AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2") +AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes") +AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod") +AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8") +AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon") +AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16") +AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml") +AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe") +AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras") +AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve") +AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2") +AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes") +AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4") +AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3") +AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm") +AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc") +AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand") +AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte") +AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs") +AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb") +AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.h b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h index 965d38535e74..a2d2cf32d715 100644 --- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.h @@ -53,7 +53,7 @@ enum ArchExtKind : unsigned { AEK_SVE2AES = 1 << 24, AEK_SVE2SM4 = 1 << 25, AEK_SVE2SHA3 = 1 << 26, - AEK_BITPERM = 1 << 27, + AEK_SVE2BITPERM = 1 << 27, }; enum class ArchKind { diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.h b/contrib/llvm/include/llvm/Support/ARMTargetParser.h index 4b9070dea596..02d4c975129f 100644 --- a/contrib/llvm/include/llvm/Support/ARMTargetParser.h +++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.h @@ -39,19 +39,13 @@ enum ArchExtKind : unsigned { AEK_DSP = 1 << 10, AEK_FP16 = 1 << 11, AEK_RAS = 1 << 12, - AEK_SVE = 1 << 13, - AEK_DOTPROD = 1 << 14, - AEK_SHA2 = 1 << 15, - AEK_AES = 1 << 16, - AEK_FP16FML = 1 << 17, - AEK_SB = 1 << 18, - AEK_SVE2 = 1 << 19, - AEK_SVE2AES = 1 << 20, - AEK_SVE2SM4 = 1 << 21, - AEK_SVE2SHA3 = 1 << 22, - AEK_BITPERM = 1 << 23, - AEK_FP_DP = 1 << 24, - AEK_LOB = 1 << 25, + AEK_DOTPROD = 1 << 13, + AEK_SHA2 = 1 << 14, + AEK_AES = 1 << 15, + AEK_FP16FML = 1 << 16, + AEK_SB = 1 << 17, + AEK_FP_DP = 1 << 18, + AEK_LOB = 1 << 19, // Unsupported extensions. AEK_OS = 0x8000000, AEK_IWMMXT = 0x10000000, diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h index 471055921fa8..994b6ec9c229 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -19,6 +19,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/IR/ValueHandle.h" #include namespace llvm { @@ -28,8 +29,8 @@ class Value; struct DivRemMapKey { bool SignedOp; - Value *Dividend; - Value *Divisor; + AssertingVH Dividend; + AssertingVH Divisor; DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor) : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} @@ -50,8 +51,10 @@ template <> struct DenseMapInfo { } static unsigned getHashValue(const DivRemMapKey &Val) { - return (unsigned)(reinterpret_cast(Val.Dividend) ^ - reinterpret_cast(Val.Divisor)) ^ + return (unsigned)(reinterpret_cast( + static_cast(Val.Dividend)) ^ + reinterpret_cast( + static_cast(Val.Divisor))) ^ (unsigned)Val.SignedOp; } }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 7721e996aca5..5e49fec9c053 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); } else if (MI->getOperand(OpNo).isMBB()) { const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); Sym->print(OS, AP->MAI); diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 52b4bbea012b..e6f2aa9ef930 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, TheUse = InsertedShift; } - // If we removed all uses, nuke the shift. + // If we removed all uses, or there are none, nuke the shift. if (ShiftI->use_empty()) { salvageDebugInfo(*ShiftI); ShiftI->eraseFromParent(); + MadeChange = true; } return MadeChange; diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index a669e64692b9..05e994c9eb51 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -691,9 +691,17 @@ void LiveDebugValues::insertTransferDebugPair( "No register supplied when handling a restore of a debug value"); MachineFunction *MF = MI.getMF(); DIBuilder DIB(*const_cast(MF->getFunction()).getParent()); + + const DIExpression *NewExpr; + if (auto Fragment = DebugInstr->getDebugExpression()->getFragmentInfo()) + NewExpr = *DIExpression::createFragmentExpression(DIB.createExpression(), + Fragment->OffsetInBits, Fragment->SizeInBits); + else + NewExpr = DIB.createExpression(); + NewDebugInstr = BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false, - NewReg, DebugInstr->getDebugVariable(), DIB.createExpression()); + NewReg, DebugInstr->getDebugVariable(), NewExpr); VarLoc VL(*NewDebugInstr, LS); ProcessVarLoc(VL, NewDebugInstr); LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: "; @@ -848,9 +856,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI, << "\n"); } // Check if the register or spill location is the location of a debug value. + // FIXME: Don't create a spill transfer if there is a complex expression, + // because we currently cannot recover the original expression on restore. for (unsigned ID : OpenRanges.getVarLocs()) { + const MachineInstr *DebugInstr = &VarLocIDs[ID].MI; + if (TKind == TransferKind::TransferSpill && - VarLocIDs[ID].isDescribedByReg() == Reg) { + VarLocIDs[ID].isDescribedByReg() == Reg && + !DebugInstr->getDebugExpression()->isComplex()) { LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '(' << VarLocIDs[ID].Var.getVar()->getName() << ")\n"); } else if (TKind == TransferKind::TransferRestore && diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 2df6d40d9293..a5af5cb72df9 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ namespace { AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ namespace { AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } void releaseMemory() override { @@ -133,6 +137,11 @@ namespace { bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's beneficial to move common computations of MBB + /// and MBB1 to CandidateBB. + bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT, if (!CMBB->isLegalToHoistInto()) continue; + if (!isBeneficalToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) { return Changed; } +bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + if (CandidateBB->getParent()->getFunction().hasMinSize()) + return true; + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); + MBFI = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index aadcd7319799..2e720018262c 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -121,7 +121,7 @@ ArrayRef MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { BBCallbacks.back().setMap(this); Entry.Index = BBCallbacks.size() - 1; Entry.Fn = BB->getParent(); - Entry.Symbols.push_back(Context.createTempSymbol()); + Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken())); return Entry.Symbols; } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 568c6191e512..e09f2e760f55 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + GluedNodes.pop_back(); } auto NewInsn = @@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } } // Insert all the dbg_values which have not already been inserted in source diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9f..6b0245dfd380 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1084,6 +1084,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e818dd27c05e..3c02c36a7d26 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8021,6 +8021,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + ExtraInfo.update(T); } @@ -8105,7 +8113,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory || - (OpInfo.ConstraintType == TargetLowering::C_Other && + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); @@ -8119,13 +8128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && !OpInfo.isIndirect) || OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for C_Other). Find a - // register that we can use. + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + @@ -8205,15 +8215,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; - if (OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; @@ -8250,7 +8269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && "Unknown constraint type!"); // TODO: Support this. @@ -8356,6 +8376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Val = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); break; + case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b260cd91d468..2d90dcba12b6 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3567,15 +3567,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const { if (S == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + return C_RegisterClass; case 'm': // memory case 'o': // offsetable case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant case 'p': // Address. case 'X': // Allow ANY value. @@ -3950,6 +3952,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { + case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -4069,11 +4072,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - // If this is an 'other' constraint, see if the operand is valid for it. - // For example, on X86 we might have an 'rI' constraint. If the operand - // is an integer in the range [0..31] we want to use I (saving a load - // of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other && Op.getNode()) { + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector ResultOps; diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp index 0dc2e2d37caf..6f9efec36361 100644 --- a/contrib/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -61,6 +61,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, bool DoAutoReset) : SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi), Symbols(Allocator), UsedNames(Allocator), + InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), AutoReset(DoAutoReset) { SecureLogFile = AsSecureLogFileName; @@ -90,6 +91,7 @@ void MCContext::reset() { XCOFFAllocator.DestroyAll(); MCSubtargetAllocator.DestroyAll(); + InlineAsmUsedLabelNames.clear(); UsedNames.clear(); Symbols.clear(); Allocator.Reset(); @@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer, Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this)); } +void MCContext::registerInlineAsmLabel(MCSymbol *Sym) { + InlineAsmUsedLabelNames[Sym->getName()] = Sym; +} + //===----------------------------------------------------------------------===// // Section Management //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index 084f6a7a2e14..c2cbca2177be 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -1142,7 +1142,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } } - MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName); + MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); + if (!Sym) + Sym = getContext().getOrCreateSymbol(SymbolName); // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. diff --git a/contrib/llvm/lib/Object/RelocationResolver.cpp b/contrib/llvm/lib/Object/RelocationResolver.cpp index 0a243f32e12c..41a0ac7fbd10 100644 --- a/contrib/llvm/lib/Object/RelocationResolver.cpp +++ b/contrib/llvm/lib/Object/RelocationResolver.cpp @@ -90,9 +90,9 @@ static bool supportsBPF(uint64_t Type) { static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) { switch (R.getType()) { case ELF::R_BPF_64_32: - return S & 0xFFFFFFFF; + return (S + A) & 0xFFFFFFFF; case ELF::R_BPF_64_64: - return S; + return S + A; default: llvm_unreachable("Invalid relocation type"); } diff --git a/contrib/llvm/lib/Support/AArch64TargetParser.cpp b/contrib/llvm/lib/Support/AArch64TargetParser.cpp index df4caa1f07fd..6f1d6d50eee2 100644 --- a/contrib/llvm/lib/Support/AArch64TargetParser.cpp +++ b/contrib/llvm/lib/Support/AArch64TargetParser.cpp @@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions, Features.push_back("+sve2-sm4"); if (Extensions & AEK_SVE2SHA3) Features.push_back("+sve2-sha3"); - if (Extensions & AEK_BITPERM) - Features.push_back("+bitperm"); + if (Extensions & AEK_SVE2BITPERM) + Features.push_back("+sve2-bitperm"); if (Extensions & AEK_RCPC) Features.push_back("+rcpc"); diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc index e80880c6b3cb..27c8a1bc9b74 100644 --- a/contrib/llvm/lib/Support/Unix/Path.inc +++ b/contrib/llvm/lib/Support/Unix/Path.inc @@ -1200,7 +1200,7 @@ namespace fs { /// implementation. std::error_code copy_file(const Twine &From, const Twine &To) { uint32_t Flag = COPYFILE_DATA; -#if __has_builtin(__builtin_available) +#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE) if (__builtin_available(macos 10.12, *)) { bool IsSymlink; if (std::error_code Error = is_symlink_file(From, IsSymlink)) diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index e39c6995e367..f54db0aa03b2 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true", def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true", "Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>; -def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true", +def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true", "Enable bit permutation SVE2 instructions", [FeatureSVE2]>; def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7becc99fb5c7..6c250aea39f0 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxLoadsPerMemcmpOptSize = 4; + MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() + ? MaxLoadsPerMemcmpOptSize : 8; + setStackPointerRegisterToSaveRestore(AArch64::SP); setSchedulingPreference(Sched::Hybrid); @@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { switch (Constraint[0]) { default: break; - case 'z': - return C_Other; case 'x': case 'w': return C_RegisterClass; @@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const { // currently handle addresses it is the same as 'r'. case 'Q': return C_Memory; + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'Y': + case 'Z': + return C_Immediate; + case 'z': case 'S': // A symbolic address return C_Other; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index eed53f36d574..020035c7f6c3 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">, def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">, AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">; def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">, - AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">; + AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">; def HasRCPC : Predicate<"Subtarget->hasRCPC()">, AssemblerPredicate<"FeatureRCPC", "rcpc">; def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 79ab42f4c080..8e1ff999bd57 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in { defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">; defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">; + // SVE2 predicated shifts + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">; @@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in { defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">; // SVE2 bitwise shift and insert - defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">; - defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">; + defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">; + defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">; // SVE2 bitwise shift right and accumulate - defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">; - defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">; - defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">; - defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">; + defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">; + defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">; @@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in { defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">; defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; - // SVE2 bitwise shift right narrow - defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">; - defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">; - defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">; - defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">; - defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">; - defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">; - defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">; - defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">; - defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">; - defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">; - defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">; - defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">; - defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">; - defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">; - defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">; - defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">; + // SVE2 bitwise shift right narrow (bottom) + defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; + defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; + defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; + defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; + defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; + defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; + defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; + defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; - // SVE2 integer add/subtract narrow high part - defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">; - defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">; - defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">; - defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">; - defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">; - defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">; - defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">; - defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">; + // SVE2 bitwise shift right narrow (top) + defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; + defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; + defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; + defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; + defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; + defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; + defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; + defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; - // SVE2 saturating extract narrow - defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">; - defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">; - defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">; - defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">; - defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">; - defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">; + // SVE2 integer add/subtract narrow high part (bottom) + defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">; + defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">; + defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">; + defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">; + + // SVE2 integer add/subtract narrow high part (top) + defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">; + defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">; + defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">; + defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">; + + // SVE2 saturating extract narrow (bottom) + defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">; + defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">; + defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">; + + // SVE2 saturating extract narrow (top) + defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">; + defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">; + defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">; // SVE2 character match defm MATCH_PPzZZ : sve2_char_match<0b0, "match">; @@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in { // SVE2 histogram generation (vector) defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">; + // SVE2 floating-point base 2 logarithm as integer + defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; + // SVE2 floating-point convert precision defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">; defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">; defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">; + def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; // SVE2 floating-point pairwise operations defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">; @@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in { def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">; def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">; - // sve_int_rotate_imm + // SVE2 bitwise xor and rotate right by immediate defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">; // SVE2 extract vector (immediate offset, constructive) def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">; - // SVE floating-point convert precision - def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>; + // SVE2 non-temporal gather loads + defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; + defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; + defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; + defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; + defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - // SVE floating-point convert to integer - defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">; - - // Non-temporal contiguous loads (vector + register) - defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>; - defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>; - defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>; - defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>; - defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>; - - defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; - defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; - defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; - defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; - defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; - defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; - defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; + defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>; + defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>; + defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>; + defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>; + defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>; + defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>; + defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>; // SVE2 vector splice (constructive) defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; - // Predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">; - defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">; + // SVE2 non-temporal scatter stores + defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; + defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; + defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; - // Non-temporal contiguous stores (vector + register) - defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>; - defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>; - defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>; + defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; + defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; + defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; + defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>; - defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>; - defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>; - defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>; - - // SVE table lookup (three sources) + // SVE2 table lookup (three sources) defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">; - // SVE integer compare scalar count and limit + // SVE2 integer compare scalar count and limit defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">; defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">; @@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in { defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">; defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">; - // SVE pointer conflict compare + // SVE2 pointer conflict compare defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">; defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a4b78f2a7d6b..301bf72d5239 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } +AArch64TTIImpl::TTI::MemCmpExpansionOptions +AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { + TTI::MemCmpExpansionOptions Options; + Options.AllowOverlappingLoads = !ST->requiresStrictAlign(); + Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); + Options.NumLoadsPerBlock = Options.MaxNumLoads; + // TODO: Though vector loads usually perform well on AArch64, in some targets + // they may wake up the FP unit, which raises the power consumption. Perhaps + // they could be used with no holds barred (-O3). + Options.LoadSizes = {8, 4, 2, 1}; + return Options; +} + int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 10c15a139b4c..95cda63b0174 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -130,6 +130,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I = nullptr); + TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, + bool IsZeroCmp) const; + int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I = nullptr); diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f4c55d48d215..09b42811f786 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -2840,7 +2840,7 @@ static const struct Extension { {"sve2-aes", {AArch64::FeatureSVE2AES}}, {"sve2-sm4", {AArch64::FeatureSVE2SM4}}, {"sve2-sha3", {AArch64::FeatureSVE2SHA3}}, - {"bitperm", {AArch64::FeatureSVE2BitPerm}}, + {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}}, // FIXME: Unsupported extensions {"pan", {}}, {"lor", {}}, diff --git a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td index 808e59467081..dfd6c576e99b 100644 --- a/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64 opc, string asm> { } class sve_int_count_v sz8_64, bits<5> opc, string asm, - ZPRRegOp zprty> -: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg), - asm, "\t$Zdn, $Pg", + ZPRRegOp zprty, PPRRegOp pprty> +: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm), + asm, "\t$Zdn, $Pm", "", []>, Sched<[]> { - bits<4> Pg; + bits<4> Pm; bits<5> Zdn; let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; @@ -416,7 +416,7 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, let Inst{18-16} = opc{4-2}; let Inst{15-11} = 0b10000; let Inst{10-9} = opc{1-0}; - let Inst{8-5} = Pg; + let Inst{8-5} = Pm; let Inst{4-0} = Zdn; let Constraints = "$Zdn = $_Zdn"; @@ -425,9 +425,16 @@ class sve_int_count_v sz8_64, bits<5> opc, string asm, } multiclass sve_int_count_v opc, string asm> { - def _H : sve_int_count_v<0b01, opc, asm, ZPR16>; - def _S : sve_int_count_v<0b10, opc, asm, ZPR32>; - def _D : sve_int_count_v<0b11, opc, asm, ZPR64>; + def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>; + def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>; + def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>; + + def : InstAlias(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>; + def : InstAlias(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, @@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl { } class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> -: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm), +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { @@ -758,6 +765,8 @@ class sve2_int_perm_tbx sz8_64, string asm, ZPRRegOp zprty> let Inst{15-10} = 0b001011; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_int_perm_tbx { @@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd { class sve2_fp_convert_precision opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> -: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn), +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { @@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision opc, string asm, let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } multiclass sve2_fp_convert_down_narrow { @@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise opc, string asm> { def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>; } -multiclass sve2_bitwise_xor_interleaved { - let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in { - def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>; - def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>; - def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>; - def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>; - } -} - multiclass sve2_misc_int_addsub_long_interleaved opc, string asm> { def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>; def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>; def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>; } +class sve2_bitwise_xor_interleaved sz, bits<1> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b0; + let Inst{20-16} = Zm; + let Inst{15-11} = 0b10010; + let Inst{10} = opc; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = Destructive; + let ElementSize = ElementSizeNone; +} + +multiclass sve2_bitwise_xor_interleaved { + def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>; + def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>; + def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>; + def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>; +} + class sve2_bitwise_shift_left_long tsz8_64, bits<2> opc, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2, Operand immtype> @@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long opc, string asm> { // SVE2 Accumulate Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, - ZPRRegOp zprty, Operand immtype> -: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm), +class sve2_int_bin_shift_imm tsz8_64, bit opc, string asm, + ZPRRegOp zprty, Operand immtype> +: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { bits<5> Zd; @@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm tsz8_64, bit opc, string asm, let Inst{10} = opc; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; } -multiclass sve2_int_bin_cons_shift_imm_left { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { +multiclass sve2_int_bin_shift_imm_left { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -multiclass sve2_int_bin_cons_shift_imm_right { - def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right { + def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } } -class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm, - ZPRRegOp zprty, Operand immtype> +class sve2_int_bin_accum_shift_imm tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty, Operand immtype> : I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm), asm, "\t$Zda, $Zn, $imm", "", []>, Sched<[]> { @@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm tsz8_64, bits<2> opc, string asm let ElementSize = ElementSizeNone; } -multiclass sve2_int_bin_accum_cons_shift_imm_right opc, string asm> { - def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; - def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { +multiclass sve2_int_bin_accum_shift_imm_right opc, string asm> { + def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; + def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { + def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> { let Inst{20-19} = imm{4-3}; } - def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { + def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> { let Inst{22} = imm{5}; let Inst{20-19} = imm{4-3}; } @@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry opc, string asm> { // SVE2 Narrowing Group //===----------------------------------------------------------------------===// -class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, - string asm, ZPRRegOp zprty1, - ZPRRegOp zprty2, Operand immtype> +class sve2_int_bin_shift_imm_narrow_bottom tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm), asm, "\t$Zd, $Zn, $imm", "", []>, Sched<[]> { @@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow tsz8_64, bits<4> opc, let Inst{20-19} = tsz8_64{1-0}; let Inst{18-16} = imm{2-0}; // imm3 let Inst{15-14} = 0b00; - let Inst{13-10} = opc; + let Inst{13-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_bin_cons_shift_imm_right_narrow opc, string asm> { - def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16, - vecshiftR8>; - def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32, - vecshiftR16> { +multiclass sve2_int_bin_shift_imm_right_narrow_bottom opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { let Inst{19} = imm{3}; } - def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64, - vecshiftR32> { + def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { let Inst{20-19} = imm{4-3}; } } -class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_bin_shift_imm_narrow_top tsz8_64, bits<3> opc, + string asm, ZPRRegOp zprty1, + ZPRRegOp zprty2, Operand immtype> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm), + asm, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> imm; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-16} = imm{2-0}; // imm3 + let Inst{15-14} = 0b00; + let Inst{13-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_bin_shift_imm_right_narrow_top opc, string asm> { + def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, + vecshiftR8>; + def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, + vecshiftR16> { + let Inst{19} = imm{3}; + } + def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, + vecshiftR32> { + let Inst{20-19} = imm{4-3}; + } +} + +class sve2_int_addsub_narrow_high_bottom sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zd; @@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high sz, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-16} = Zm; let Inst{15-13} = 0b011; - let Inst{12-10} = opc; // S, R, T + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b0; // Top let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_addsub_narrow_high opc, string asm> { - def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_addsub_narrow_high_bottom opc, string asm> { + def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>; } -class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, - ZPRRegOp zprty1, ZPRRegOp zprty2> +class sve2_int_addsub_narrow_high_top sz, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm), + asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + bits<5> Zm; + let Inst{31-24} = 0b01000101; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-13} = 0b011; + let Inst{12-11} = opc; // S, R + let Inst{10} = 0b1; // Top + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_addsub_narrow_high_top opc, string asm> { + def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_bottom tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zd), (ins zprty2:$Zn), asm, "\t$Zd, $Zn", "", []>, Sched<[]> { bits<5> Zd; @@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow tsz8_64, bits<3> opc, string asm, let Inst{21} = 0b1; let Inst{20-19} = tsz8_64{1-0}; let Inst{18-13} = 0b000010; - let Inst{12-10} = opc; + let Inst{12-11} = opc; + let Inst{10} = 0b0; let Inst{9-5} = Zn; let Inst{4-0} = Zd; } -multiclass sve2_int_sat_extract_narrow opc, string asm> { - def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>; - def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>; - def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>; +multiclass sve2_int_sat_extract_narrow_bottom opc, string asm> { + def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>; +} + +class sve2_int_sat_extract_narrow_top tsz8_64, bits<2> opc, string asm, + ZPRRegOp zprty1, ZPRRegOp zprty2> +: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn), + asm, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<5> Zd; + bits<5> Zn; + let Inst{31-23} = 0b010001010; + let Inst{22} = tsz8_64{2}; + let Inst{21} = 0b1; + let Inst{20-19} = tsz8_64{1-0}; + let Inst{18-13} = 0b000010; + let Inst{12-11} = opc; + let Inst{10} = 0b1; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; +} + +multiclass sve2_int_sat_extract_narrow_top opc, string asm> { + def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>; + def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>; + def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>; } //===----------------------------------------------------------------------===// @@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss msz, string asm, RegisterOperand listty, (!cast(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>; } -class sve2_mem_cstnt_vs_base opc, dag iops, string asm, - RegisterOperand VecList> -: I<(outs VecList:$Zt), iops, +class sve2_mem_sstnt_vs_base opc, string asm, + RegisterOperand listty, ZPRRegOp zprty> +: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), asm, "\t$Zt, $Pg, [$Zn, $Rm]", "", []>, Sched<[]> { @@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base opc, dag iops, string asm, let mayStore = 1; } -multiclass sve2_mem_cstnt_vs opc, string asm, +multiclass sve2_mem_sstnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cstnt_vs_base; + def _REAL : sve2_mem_sstnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } @@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill { (!cast(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; } -class sve2_mem_cldnt_vs_base opc, dag iops, string asm, +class sve2_mem_gldnt_vs_base opc, dag iops, string asm, RegisterOperand VecList> : I<(outs VecList:$Zt), iops, asm, "\t$Zt, $Pg/z, [$Zn, $Rm]", @@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base opc, dag iops, string asm, let mayLoad = 1; } -multiclass sve2_mem_cldnt_vs opc, string asm, +multiclass sve2_mem_gldnt_vs opc, string asm, RegisterOperand listty, ZPRRegOp zprty> { - def _REAL : sve2_mem_cldnt_vs_base; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>; - def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>; def : InstAlias(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>; } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 18bb9bf3eccc..d390c9e237e6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const { /// constraint it is for this target. ARMTargetLowering::ConstraintType ARMTargetLowering::getConstraintType(StringRef Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + if (S == 1) { switch (Constraint[0]) { default: break; case 'l': return C_RegisterClass; @@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const { case 'h': return C_RegisterClass; case 'x': return C_RegisterClass; case 't': return C_RegisterClass; - case 'j': return C_Other; // Constant for movw. - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as an 'r' memory constraint. + case 'j': return C_Immediate; // Constant for movw. + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as an 'r' memory constraint. case 'Q': return C_Memory; } - } else if (Constraint.size() == 2) { + } else if (S == 2) { switch (Constraint[0]) { default: break; case 'T': return C_RegisterClass; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index cfeb13c6acb6..fa266c41080c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { [(ARMbrjt tGPR:$target, tjumptable:$jt)]>, Sched<[WriteBrTbl]> { let Size = 2; + let isNotDuplicable = 1; list Predicates = [IsThumb, IsThumb1Only]; } } @@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them // and make use of the same compressed jump table format as Thumb-2. let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1, - isIndirectBranch = 1 in { + isIndirectBranch = 1, isNotDuplicable = 1 in { def tTBB_JT : tPseudoInst<(outs), (ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>, Sched<[WriteBr]>; diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp index b6ba5f22fafb..f159beee9730 100644 --- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp +++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp @@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html switch (Constraint[0]) { + default: + break; case 'a': // Simple upper registers case 'b': // Base pointer registers pairs case 'd': // Upper register @@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const { case 'O': // Integer constant (Range: 8, 16, 24) case 'P': // Integer constant (Range: 1) case 'R': // Integer constant (Range: -6 to 5)x - return C_Other; - default: - break; + return C_Immediate; } } diff --git a/contrib/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/contrib/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp index 51d4cbc8a429..509484b71544 100644 --- a/contrib/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp @@ -116,9 +116,8 @@ class BPFAbstractMemberAccess final : public ModulePass { void replaceWithGEP(std::vector &CallList, uint32_t NumOfZerosIndex, uint32_t DIIndex); - Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr, - std::string &AccessKey, uint32_t Kind, - MDNode *&TypeMeta); + Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, + uint32_t Kind, MDNode *&TypeMeta); bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex); bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind); }; @@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue, /// Compute the base of the whole preserve_*_access_index chains, i.e., the base /// pointer of the first preserve_*_access_index call, and construct the access /// string, which will be the name of a global variable. -Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, - std::string &AccessStr, +Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey, uint32_t Kind, MDNode *&TypeMeta) { @@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2) return nullptr; - // Construct the type string AccessStr. + // Construct the type string AccessKey. for (unsigned I = 0; I < AccessIndices.size(); ++I) - AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr; + AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey; if (TypeNameIndex == AccessIndices.size() - 1) - AccessStr = "0:" + AccessStr; + AccessKey = "0:" + AccessKey; // Access key is the type name + access string, uniquely identifying // one kernel memory access. - AccessKey = LastTypeName + ":" + AccessStr; + AccessKey = LastTypeName + ":" + AccessKey; return Base; } @@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call, /// transformation to a chain of relocable GEPs. bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, uint32_t Kind) { - std::string AccessStr, AccessKey; + std::string AccessKey; MDNode *TypeMeta = nullptr; Value *Base = - computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta); + computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta); if (!Base) return false; @@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call, if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) { GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false, - GlobalVariable::ExternalLinkage, NULL, AccessStr); + GlobalVariable::ExternalLinkage, NULL, AccessKey); GV->addAttribute(BPFCoreSharedInfo::AmaAttr); // Set the metadata (debuginfo types) for the global. if (TypeMeta) diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.cpp b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp index fa35c6619e21..5c542e739088 100644 --- a/contrib/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/contrib/llvm/lib/Target/BPF/BTFDebug.cpp @@ -30,6 +30,18 @@ static const char *BTFKindStr[] = { #include "BTF.def" }; +static const DIType * stripQualifiers(const DIType *Ty) { + while (const auto *DTy = dyn_cast(Ty)) { + unsigned Tag = DTy->getTag(); + if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type && + Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type) + break; + Ty = DTy->getBaseType(); + } + + return Ty; +} + /// Emit a BTF common type. void BTFTypeBase::emitType(MCStreamer &OS) { OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) + @@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) { } } -BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, - uint32_t NumElems) - : ElemSize(ElemSize) { +BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId, + uint32_t ElemSize, uint32_t NumElems) + : ElemTyNoQual(Ty), ElemSize(ElemSize) { Kind = BTF::BTF_KIND_ARRAY; BTFType.NameOff = 0; BTFType.Info = Kind << 24; @@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) { // created during initial type traversal. Just // retrieve that type id. ArrayInfo.IndexType = BDebug.getArrayIndexTypeId(); + + ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual) + : ArrayInfo.ElemType; } void BTFTypeArray::emitType(MCStreamer &OS) { @@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) { void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset, uint32_t &ElementTypeId) { - ElementTypeId = ArrayInfo.ElemType; + ElementTypeId = ElemTypeNoQual; LocOffset = Loc * ElemSize; } @@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) { } else { BTFMember.Offset = DDTy->getOffsetInBits(); } - BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType()); + const auto *BaseTy = DDTy->getBaseType(); + BTFMember.Type = BDebug.getTypeId(BaseTy); + MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy))); Members.push_back(BTFMember); } } @@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); } void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset, uint32_t &MemberType) { - MemberType = Members[Loc].Type; + MemberType = MemberTypeNoQual[Loc]; MemberOffset = HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset; } @@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { uint32_t ElemTypeId, ElemSize; const DIType *ElemType = CTy->getBaseType(); visitTypeEntry(ElemType, ElemTypeId, false, false); + + // Strip qualifiers from element type to get accurate element size. + ElemType = stripQualifiers(ElemType); ElemSize = ElemType->getSizeInBits() >> 3; if (!CTy->getSizeInBits()) { - auto TypeEntry = llvm::make_unique(ElemTypeId, 0, 0); + auto TypeEntry = llvm::make_unique(ElemType, ElemTypeId, 0, 0); ArrayTypes.push_back(TypeEntry.get()); ElemTypeId = addType(std::move(TypeEntry), CTy); } else { @@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) { const DISubrange *SR = cast(Element); auto *CI = SR->getCount().dyn_cast(); int64_t Count = CI->getSExtValue(); + const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr; auto TypeEntry = - llvm::make_unique(ElemTypeId, ElemSize, Count); + llvm::make_unique(ArrayElemTy, ElemTypeId, + ElemSize, Count); ArrayTypes.push_back(TypeEntry.get()); if (I == 0) ElemTypeId = addType(std::move(TypeEntry), CTy); @@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI, unsigned RootId = populateStructType(RootTy); setTypeFromId(RootId, &PrevStructType, &PrevArrayType); unsigned RootTySize = PrevStructType->getStructSize(); + StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1); BTFOffsetReloc OffsetReloc; OffsetReloc.Label = ORSym; - OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back()); + OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back()); OffsetReloc.TypeID = RootId; uint32_t Start = 0, End = 0, Offset = 0; bool FirstAccess = true; - for (auto C : AccessPattern) { + for (auto C : IndexPattern) { if (C != ':') { End++; } else { - std::string SubStr = AccessPattern.substr(Start, End - Start); + std::string SubStr = IndexPattern.substr(Start, End - Start); int Loc = std::stoi(SubStr); if (FirstAccess) { @@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI, Offset += LocOffset; PrevArrayType = nullptr; setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType); + } else { + llvm_unreachable("Internal Error: BTF offset relocation type traversal error"); } + Start = End + 1; End = Start; } } - AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset; + AccessOffsets[AccessPattern.str()] = Offset; OffsetRelocTable[SecNameOff].push_back(OffsetReloc); } @@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) { MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index); DIType *Ty = dyn_cast(MDN); std::string TypeName = Ty->getName(); - int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()]; + int64_t Imm = AccessOffsets[GVar->getName().str()]; // Emit "mov ri, " for abstract member accesses. OutMI.setOpcode(BPF::MOV_ri); diff --git a/contrib/llvm/lib/Target/BPF/BTFDebug.h b/contrib/llvm/lib/Target/BPF/BTFDebug.h index 6c0cdde17d9b..e210d18f941e 100644 --- a/contrib/llvm/lib/Target/BPF/BTFDebug.h +++ b/contrib/llvm/lib/Target/BPF/BTFDebug.h @@ -104,11 +104,14 @@ class BTFTypeEnum : public BTFTypeBase { /// Handle array type. class BTFTypeArray : public BTFTypeBase { + const DIType *ElemTyNoQual; uint32_t ElemSize; struct BTF::BTFArray ArrayInfo; + uint32_t ElemTypeNoQual; public: - BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems); + BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId, + uint32_t ElemSize, uint32_t NumElems); uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; } void completeType(BTFDebug &BDebug); void emitType(MCStreamer &OS); @@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase { const DICompositeType *STy; bool HasBitField; std::vector Members; + std::vector MemberTypeNoQual; public: BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField, diff --git a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 0172c6298772..f10f7a2b77d6 100644 --- a/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { Res = V; } else Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext()); + + MCBinaryExpr::Opcode Opcode; + switch (getLexer().getKind()) { + default: + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + case AsmToken::Plus: + Opcode = MCBinaryExpr::Add; + break; + case AsmToken::Minus: + Opcode = MCBinaryExpr::Sub; + break; + } + + const MCExpr *Expr; + if (getParser().parseExpression(Expr)) + return MatchOperand_ParseFail; + Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } diff --git a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 32c3b9684d2c..bbaa16c08634 100644 --- a/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/contrib/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); @@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, nullptr, RI->getDwarfRegNum(FPReg, true), 0)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + unsigned VR = + MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } } } @@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still access stack objects using SP. + FrameReg = RISCV::X2; + Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ce7b85911ab6..e695f79f5cf4 100644 --- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( // We can materialise `c1 << c2` into an add immediate, so it's "free", // and the combine should happen, to potentially allow further combines // later. - if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + if (ShiftedC1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(ShiftedC1Int.getSExtValue())) return true; // We can materialise `c1` in an add immediate, so it's "free", and the // combine should be prevented. - if (isLegalAddImmediate(C1Int.getSExtValue())) + if (C1Int.getMinSignedBits() <= 64 && + isLegalAddImmediate(C1Int.getSExtValue())) return false; // Neither constant will fit into an immediate, so find materialisation @@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; } +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +RISCVTargetLowering::ConstraintType +RISCVTargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'f': + return C_RegisterClass; + case 'I': + case 'J': + case 'K': + return C_Immediate; + } + } + return TargetLowering::getConstraintType(Constraint); +} + std::pair RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, @@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, switch (Constraint[0]) { case 'r': return std::make_pair(0U, &RISCV::GPRRegClass); + case 'f': + if (Subtarget.hasStdExtF() && VT == MVT::f32) + return std::make_pair(0U, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtD() && VT == MVT::f64) + return std::make_pair(0U, &RISCV::FPR64RegClass); + break; default: break; } diff --git a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h index 17db03bbb69e..f28c4753c1d9 100644 --- a/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/contrib/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -92,6 +92,7 @@ class RISCVTargetLowering : public TargetLowering { // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp index a6d440fa8aa2..804f7ba74edf 100644 --- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const { case 'e': return C_RegisterClass; case 'I': // SIMM13 - return C_Other; + return C_Immediate; } } diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 78820f511ab4..e7b7a5b0cd53 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const { case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff - return C_Other; + return C_Immediate; default: break; diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 3112f00c91f2..e20315da55a5 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", "Support 64-bit instructions">; def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", - "64-bit with cmpxchg16b">; + "64-bit with cmpxchg16b", + [FeatureCMPXCHG8B]>; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 95d31e62cafc..34ad589d205f 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N, Complexity += 2; } + // Heuristic: try harder to form an LEA from ADD if the operands set flags. + // Unlike ADD, LEA does not affect flags, so we will be less likely to require + // duplicating flag-producing instructions later in the pipeline. + if (N.getOpcode() == ISD::ADD) { + auto isMathWithFlags = [](SDValue V) { + switch (V.getOpcode()) { + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::ADC: + case X86ISD::SBB: + /* TODO: These opcodes can be added safely, but we may want to justify + their inclusion for different reasons (better for reg-alloc). + case X86ISD::SMUL: + case X86ISD::UMUL: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + */ + // Value 1 is the flag output of the node - verify it's not dead. + return !SDValue(V.getNode(), 1).use_empty(); + default: + return false; + } + }; + // TODO: This could be an 'or' rather than 'and' to make the transform more + // likely to happen. We might want to factor in whether there's a + // load folding opportunity for the math op that disappears with LEA. + if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) + Complexity++; + } + if (AM.Disp) Complexity++; @@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { SDValue ImplDef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); - NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef, - NBits); + + SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); + insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); + NBits = SDValue( + CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, + NBits, SRIdxVal), 0); insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); if (Subtarget->hasBMI2()) { diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 0b4bf687e6cf..ad68ddbeaa8b 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); + // Save heapallocsite metadata. + if (CLI.CS) + if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite")) + DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); + // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, @@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops) { if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && Idx == (VT.getVectorNumElements() / 2) && Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { Ops.push_back(Src.getOperand(1)); Ops.push_back(Sub); @@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( return true; break; } - case X86ISD::SUBV_BROADCAST: { - // Reduce size of broadcast if we don't need the upper half. - unsigned HalfElts = NumElts / 2; - if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) { - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - - SDValue Half = Src; - if (SrcVT.getVectorNumElements() != HalfElts) { - MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts); - Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src); - } - - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0, - TLO.DAG, SDLoc(Op), - Half.getValueSizeInBits())); - } - break; - } case X86ISD::VPERMV: { SDValue Mask = Op.getOperand(0); APInt MaskUndef, MaskZero; @@ -34134,6 +34121,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); + } + // Subvector broadcast. + case X86ISD::SUBV_BROADCAST: { + SDLoc DL(Op); + SDValue Src = Op.getOperand(0); + if (Src.getValueSizeInBits() > ExtSizeInBits) + Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); + else if (Src.getValueSizeInBits() < ExtSizeInBits) { + MVT SrcSVT = Src.getSimpleValueType().getScalarType(); + MVT SrcVT = + MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits()); + Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src); + } + return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0, + TLO.DAG, DL, ExtSizeInBits)); } // Byte shifts by immediate. case X86ISD::VSHLDQ: @@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, Vec.getOpcode() == ISD::INSERT_SUBVECTOR && OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 && isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() && + Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() && Vec.hasOneUse()) { Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), Vec.getOperand(1), Vec.getOperand(2)); @@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case 'I': case 'J': case 'K': - case 'L': - case 'M': case 'N': case 'G': + case 'L': + case 'M': + return C_Immediate; case 'C': case 'e': case 'Z': diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 3a4283ae5406..147af8bc37c9 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3288,26 +3288,35 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ, // Look for an 'and' of two (opposite) logical shifts. // Pick the single-use shift as XShift. - Value *XShift, *YShift; + Instruction *XShift, *YShift; if (!match(I.getOperand(0), - m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))), - m_CombineAnd(m_AnyLogicalShift, m_Value(YShift))))) + m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)), + m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift))))) return nullptr; - // If YShift is a single-use 'lshr', swap the shifts around. - if (match(YShift, m_OneUse(m_AnyLShr))) + // If YShift is a 'lshr', swap the shifts around. + if (match(YShift, m_AnyLShr)) std::swap(XShift, YShift); // The shifts must be in opposite directions. - Instruction::BinaryOps XShiftOpcode = - cast(XShift)->getOpcode(); - if (XShiftOpcode == cast(YShift)->getOpcode()) + auto XShiftOpcode = XShift->getOpcode(); + if (XShiftOpcode == YShift->getOpcode()) return nullptr; // Do not care about same-direction shifts here. Value *X, *XShAmt, *Y, *YShAmt; match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt))); match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt))); + // If one of the values being shifted is a constant, then we will end with + // and+icmp, and shift instr will be constant-folded. If they are not, + // however, we will need to ensure that we won't increase instruction count. + if (!isa(X) && !isa(Y)) { + // At least one of the hands of the 'and' should be one-use shift. + if (!match(I.getOperand(0), + m_c_And(m_OneUse(m_AnyLogicalShift), m_Value()))) + return nullptr; + } + // Can we fold (XShAmt+YShAmt) ? Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt, SQ.getWithInstruction(&I)); diff --git a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp index 876681b4f9de..e64651d97495 100644 --- a/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/DivRemPairs.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/DebugCounter.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" + using namespace llvm; #define DEBUG_TYPE "div-rem-pairs" @@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed"); DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", "Controls transformations in div-rem-pairs pass"); -/// Find matching pairs of integer div/rem ops (they have the same numerator, -/// denominator, and signedness). If they exist in different basic blocks, bring -/// them together by hoisting or replace the common division operation that is -/// implicit in the remainder: -/// X % Y <--> X - ((X / Y) * Y). -/// -/// We can largely ignore the normal safety and cost constraints on speculation -/// of these ops when we find a matching pair. This is because we are already -/// guaranteed that any exceptions and most cost are already incurred by the -/// first member of the pair. -/// -/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or -/// SimplifyCFG, but it's split off on its own because it's different enough -/// that it doesn't quite match the stated objectives of those passes. -static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, - const DominatorTree &DT) { - bool Changed = false; +/// A thin wrapper to store two values that we matched as div-rem pair. +/// We want this extra indirection to avoid dealing with RAUW'ing the map keys. +struct DivRemPairWorklistEntry { + /// The actual udiv/sdiv instruction. Source of truth. + AssertingVH DivInst; + /// The instruction that we have matched as a remainder instruction. + /// Should only be used as Value, don't introspect it. + AssertingVH RemInst; + + DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) + : DivInst(DivInst_), RemInst(RemInst_) { + assert((DivInst->getOpcode() == Instruction::UDiv || + DivInst->getOpcode() == Instruction::SDiv) && + "Not a division."); + assert(DivInst->getType() == RemInst->getType() && "Types should match."); + // We can't check anything else about remainder instruction, + // it's not strictly required to be a urem/srem. + } + + /// The type for this pair, identical for both the div and rem. + Type *getType() const { return DivInst->getType(); } + + /// Is this pair signed or unsigned? + bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } + + /// In this pair, what are the divident and divisor? + Value *getDividend() const { return DivInst->getOperand(0); } + Value *getDivisor() const { return DivInst->getOperand(1); } +}; +using DivRemWorklistTy = SmallVector; + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). Place those pairs into a worklist for further +/// processing. This indirection is needed because we have to use TrackingVH<> +/// because we will be doing RAUW, and if one of the rem instructions we change +/// happens to be an input to another div/rem in the maps, we'd have problems. +static DivRemWorklistTy getWorklist(Function &F) { // Insert all divide and remainder instructions into maps keyed by their // operands and opcode (signed or unsigned). DenseMap DivMap; @@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, } } + // We'll accumulate the matching pairs of div-rem instructions here. + DivRemWorklistTy Worklist; + // We can iterate over either map because we are only looking for matched // pairs. Choose remainders for efficiency because they are usually even more // rare than division. @@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, if (!DivInst) continue; - // We have a matching pair of div/rem instructions. If one dominates the - // other, hoist and/or replace one. + // We have a matching pair of div/rem instructions. NumPairs++; Instruction *RemInst = RemPair.second; - bool IsSigned = DivInst->getOpcode() == Instruction::SDiv; - bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned); + + // Place it in the worklist. + Worklist.emplace_back(DivInst, RemInst); + } + + return Worklist; +} + +/// Find matching pairs of integer div/rem ops (they have the same numerator, +/// denominator, and signedness). If they exist in different basic blocks, bring +/// them together by hoisting or replace the common division operation that is +/// implicit in the remainder: +/// X % Y <--> X - ((X / Y) * Y). +/// +/// We can largely ignore the normal safety and cost constraints on speculation +/// of these ops when we find a matching pair. This is because we are already +/// guaranteed that any exceptions and most cost are already incurred by the +/// first member of the pair. +/// +/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or +/// SimplifyCFG, but it's split off on its own because it's different enough +/// that it doesn't quite match the stated objectives of those passes. +static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, + const DominatorTree &DT) { + bool Changed = false; + + // Get the matching pairs of div-rem instructions. We want this extra + // indirection to avoid dealing with having to RAUW the keys of the maps. + DivRemWorklistTy Worklist = getWorklist(F); + + // Process each entry in the worklist. + for (DivRemPairWorklistEntry &E : Worklist) { + bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned()); + + auto &DivInst = E.DivInst; + auto &RemInst = E.RemInst; // If the target supports div+rem and the instructions are in the same block // already, there's nothing to do. The backend should handle this. If the @@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // The target does not have a single div/rem operation. Decompose the // remainder calculation as: // X % Y --> X - ((X / Y) * Y). - Value *X = RemInst->getOperand(0); - Value *Y = RemInst->getOperand(1); + Value *X = E.getDividend(); + Value *Y = E.getDivisor(); Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); Instruction *Sub = BinaryOperator::CreateSub(X, Mul); @@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, // Now kill the explicit remainder. We have replaced it with: // (sub X, (mul (div X, Y), Y) - RemInst->replaceAllUsesWith(Sub); - RemInst->eraseFromParent(); + Sub->setName(RemInst->getName() + ".decomposed"); + Instruction *OrigRemInst = RemInst; + // Update AssertingVH<> with new instruction so it doesn't assert. + RemInst = Sub; + // And replace the original instruction with the new one. + OrigRemInst->replaceAllUsesWith(Sub); + OrigRemInst->eraseFromParent(); NumDecomposed++; } Changed = true; @@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass { return optimizeDivRem(F, TTI, DT); } }; -} +} // namespace char DivRemPairsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp index c13fb3e04516..e6db11f47ead 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl &PNs, // speculation if the predecessor is an invoke. This doesn't seem // fundamental and we should probably be splitting critical edges // differently. - if (isa(PredBB->getTerminator()) || - isa(PredBB->getTerminator())) { + const auto *TermInst = PredBB->getTerminator(); + if (isa(TermInst) || + isa(TermInst) || + isa(TermInst)) { LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " << PredBB->getName() << "\n"); return false; diff --git a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h index 28ed6cdfde14..faddbc6d9675 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h +++ b/contrib/llvm/tools/clang/include/clang/AST/ExprCXX.h @@ -185,15 +185,20 @@ class CXXMemberCallExpr final : public CallExpr { static CXXMemberCallExpr *CreateEmpty(const ASTContext &Ctx, unsigned NumArgs, EmptyShell Empty); - /// Retrieves the implicit object argument for the member call. + /// Retrieve the implicit object argument for the member call. /// /// For example, in "x.f(5)", this returns the sub-expression "x". Expr *getImplicitObjectArgument() const; - /// Retrieves the declaration of the called method. + /// Retrieve the type of the object argument. + /// + /// Note that this always returns a non-pointer type. + QualType getObjectType() const; + + /// Retrieve the declaration of the called method. CXXMethodDecl *getMethodDecl() const; - /// Retrieves the CXXRecordDecl for the underlying type of + /// Retrieve the CXXRecordDecl for the underlying type of /// the implicit object argument. /// /// Note that this is may not be the same declaration as that of the class diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index effcbad78b23..275c4e4365d1 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -598,6 +598,10 @@ def ext_implicit_lib_function_decl : ExtWarn< def note_include_header_or_declare : Note< "include the header <%0> or explicitly provide a declaration for '%1'">; def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">; +def warn_implicit_decl_no_jmp_buf + : Warning<"declaration of built-in function '%0' requires the declaration" + " of the 'jmp_buf' type, commonly provided in the header .">, + InGroup>; def warn_implicit_decl_requires_sysheader : Warning< "declaration of built-in function '%1' requires inclusion of the header <%0>">, InGroup; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h index 7a8384f5fbc0..c6c966dfbe2c 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/TargetInfo.h @@ -1249,15 +1249,9 @@ class TargetInfo : public virtual TransferrableTargetInfo, bool isBigEndian() const { return BigEndian; } bool isLittleEndian() const { return !BigEndian; } - enum CallingConvMethodType { - CCMT_Unknown, - CCMT_Member, - CCMT_NonMember - }; - /// Gets the default calling convention for the given target and /// declaration context. - virtual CallingConv getDefaultCallingConv(CallingConvMethodType MT) const { + virtual CallingConv getDefaultCallingConv() const { // Not all targets will specify an explicit calling convention that we can // express. This will always do the right thing, even though it's not // an explicit calling convention. diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index dfd27fab796e..4ea8bfff0973 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -518,7 +518,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group, Flags<[CC def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">; def cl_std_EQ : Joined<["-"], "cl-std=">, Group, Flags<[CC1Option]>, - HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,c++">; + HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">; def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def index 0964e9b90a03..72ea23562ebd 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def +++ b/contrib/llvm/tools/clang/include/clang/Frontend/LangStandards.def @@ -174,6 +174,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL") LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1") LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2") LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0") +LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++") // CUDA LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)", diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index af762f74d745..e6c63fd9c015 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -11165,6 +11165,7 @@ class Sema { // Emitting members of dllexported classes is delayed until the class // (including field initializers) is fully parsed. SmallVector DelayedDllExportClasses; + SmallVector DelayedDllExportMemberFunctions; private: class SavePendingParsedClassStateRAII { diff --git a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp index 0d69eb90abaf..468c7f47657d 100644 --- a/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ASTContext.cpp @@ -10035,7 +10035,7 @@ CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic, break; } } - return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown); + return Target->getDefaultCallingConv(); } bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const { diff --git a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp index b30f785ba8f5..c5f86a4cc12b 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprCXX.cpp @@ -651,6 +651,13 @@ Expr *CXXMemberCallExpr::getImplicitObjectArgument() const { return nullptr; } +QualType CXXMemberCallExpr::getObjectType() const { + QualType Ty = getImplicitObjectArgument()->getType(); + if (Ty->isPointerType()) + Ty = Ty->getPointeeType(); + return Ty; +} + CXXMethodDecl *CXXMemberCallExpr::getMethodDecl() const { if (const auto *MemExpr = dyn_cast(getCallee()->IgnoreParens())) return cast(MemExpr->getMemberDecl()); diff --git a/contrib/llvm/tools/clang/lib/AST/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/AST/ItaniumCXXABI.cpp index 727a905d08a1..77fb5a1d33b3 100644 --- a/contrib/llvm/tools/clang/lib/AST/ItaniumCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ItaniumCXXABI.cpp @@ -177,7 +177,7 @@ class ItaniumCXXABI : public CXXABI { if (!isVariadic && T.isWindowsGNUEnvironment() && T.getArch() == llvm::Triple::x86) return CC_X86ThisCall; - return CC_C; + return Context.getTargetInfo().getDefaultCallingConv(); } // We cheat and just check that the class has a vtable pointer, and that it's diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp index 4dc4156df9ca..444e55f777fa 100644 --- a/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftCXXABI.cpp @@ -82,7 +82,7 @@ class MicrosoftCXXABI : public CXXABI { if (!isVariadic && Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) return CC_X86ThisCall; - return CC_C; + return Context.getTargetInfo().getDefaultCallingConv(); } bool isNearlyEmpty(const CXXRecordDecl *RD) const override { diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp index 74ac69ab8946..25f2b7b35f41 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/AArch64.cpp @@ -196,9 +196,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_NEON_FP", "0xE"); } - if (FPU & SveMode) - Builder.defineMacro("__ARM_FEATURE_SVE", "1"); - if (HasCRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); @@ -351,10 +348,19 @@ const char *const AArch64TargetInfo::GCCRegNames[] = { "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", - // Vector registers + // Neon vector registers "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", - "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" + "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + + // SVE vector registers + "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10", + "z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21", + "z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", + + // SVE predicate registers + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10", + "p11", "p12", "p13", "p14", "p15" }; ArrayRef AArch64TargetInfo::getGCCRegNames() const { diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/OSTargets.h b/contrib/llvm/tools/clang/lib/Basic/Targets/OSTargets.h index 8542311ffa41..c0373ffaa444 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/OSTargets.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/OSTargets.h @@ -618,8 +618,11 @@ class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo { Builder.defineMacro("_XOPEN_SOURCE", "600"); else Builder.defineMacro("_XOPEN_SOURCE", "500"); - if (Opts.CPlusPlus) + if (Opts.CPlusPlus) { Builder.defineMacro("__C99FEATURES__"); + Builder.defineMacro("_FILE_OFFSET_BITS", "64"); + } + // GCC restricts the next two to C++. Builder.defineMacro("_LARGEFILE_SOURCE"); Builder.defineMacro("_LARGEFILE64_SOURCE"); Builder.defineMacro("__EXTENSIONS__"); diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.cpp index f800bb0b25da..939ac46d671b 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.cpp @@ -56,6 +56,10 @@ bool RISCVTargetInfo::validateAsmConstraint( // A 5-bit unsigned immediate for CSR access instructions. Info.setRequiresImmediate(0, 31); return true; + case 'f': + // A floating-point register. + Info.setAllowsRegister(); + return true; } } @@ -65,9 +69,18 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__riscv"); bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64; Builder.defineMacro("__riscv_xlen", Is64Bit ? "64" : "32"); - // TODO: modify when more code models and ABIs are supported. + // TODO: modify when more code models are supported. Builder.defineMacro("__riscv_cmodel_medlow"); - Builder.defineMacro("__riscv_float_abi_soft"); + + StringRef ABIName = getABI(); + if (ABIName == "ilp32f" || ABIName == "lp64f") + Builder.defineMacro("__riscv_float_abi_single"); + else if (ABIName == "ilp32d" || ABIName == "lp64d") + Builder.defineMacro("__riscv_float_abi_double"); + else if (ABIName == "ilp32e") + Builder.defineMacro("__riscv_abi_rve"); + else + Builder.defineMacro("__riscv_float_abi_soft"); if (HasM) { Builder.defineMacro("__riscv_mul"); diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.h b/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.h index bc814b79ce51..ce193feaeb98 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/RISCV.h @@ -87,8 +87,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo { } bool setABI(const std::string &Name) override { - // TODO: support ilp32f and ilp32d ABIs. - if (Name == "ilp32") { + if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") { ABI = Name; return true; } @@ -105,8 +104,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo { } bool setABI(const std::string &Name) override { - // TODO: support lp64f and lp64d ABIs. - if (Name == "lp64") { + if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") { ABI = Name; return true; } diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/SPIR.h b/contrib/llvm/tools/clang/lib/Basic/Targets/SPIR.h index 6023c868dbdc..802ccf8b671e 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/SPIR.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/SPIR.h @@ -88,7 +88,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { : CCCR_Warning; } - CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override { + CallingConv getDefaultCallingConv() const override { return CC_SpirFunction; } diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h index 588b6d3da1d6..dd1e7db6c81e 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h +++ b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.h @@ -320,8 +320,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { } } - CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override { - return MT == CCMT_Member ? CC_X86ThisCall : CC_C; + CallingConv getDefaultCallingConv() const override { + return CC_C; } bool hasSjLjLowering() const override { return true; } @@ -659,7 +659,7 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo { } } - CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override { + CallingConv getDefaultCallingConv() const override { return CC_C; } diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp index 3006ca33f213..9bf8687a8765 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp @@ -35,7 +35,7 @@ std::string getClangRepositoryPath() { // If the CLANG_REPOSITORY is empty, try to use the SVN keyword. This helps us // pick up a tag in an SVN export, for example. - StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/trunk/lib/Basic/Version.cpp $"); + StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/branches/release_90/lib/Basic/Version.cpp $"); if (URL.empty()) { URL = SVNRepository.slice(SVNRepository.find(':'), SVNRepository.find("/lib/Basic")); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index a300bab49f9c..cadce507412b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -8011,6 +8011,151 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); } + case AArch64::BI_BitScanForward: + case AArch64::BI_BitScanForward64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); + case AArch64::BI_BitScanReverse: + case AArch64::BI_BitScanReverse64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); + case AArch64::BI_InterlockedAnd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); + case AArch64::BI_InterlockedExchange64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); + case AArch64::BI_InterlockedExchangeAdd64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); + case AArch64::BI_InterlockedExchangeSub64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); + case AArch64::BI_InterlockedOr64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); + case AArch64::BI_InterlockedXor64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); + case AArch64::BI_InterlockedDecrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); + case AArch64::BI_InterlockedIncrement64: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); + case AArch64::BI_InterlockedExchangeAdd8_acq: + case AArch64::BI_InterlockedExchangeAdd16_acq: + case AArch64::BI_InterlockedExchangeAdd_acq: + case AArch64::BI_InterlockedExchangeAdd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); + case AArch64::BI_InterlockedExchangeAdd8_rel: + case AArch64::BI_InterlockedExchangeAdd16_rel: + case AArch64::BI_InterlockedExchangeAdd_rel: + case AArch64::BI_InterlockedExchangeAdd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); + case AArch64::BI_InterlockedExchangeAdd8_nf: + case AArch64::BI_InterlockedExchangeAdd16_nf: + case AArch64::BI_InterlockedExchangeAdd_nf: + case AArch64::BI_InterlockedExchangeAdd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); + case AArch64::BI_InterlockedExchange8_acq: + case AArch64::BI_InterlockedExchange16_acq: + case AArch64::BI_InterlockedExchange_acq: + case AArch64::BI_InterlockedExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); + case AArch64::BI_InterlockedExchange8_rel: + case AArch64::BI_InterlockedExchange16_rel: + case AArch64::BI_InterlockedExchange_rel: + case AArch64::BI_InterlockedExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); + case AArch64::BI_InterlockedExchange8_nf: + case AArch64::BI_InterlockedExchange16_nf: + case AArch64::BI_InterlockedExchange_nf: + case AArch64::BI_InterlockedExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); + case AArch64::BI_InterlockedCompareExchange8_acq: + case AArch64::BI_InterlockedCompareExchange16_acq: + case AArch64::BI_InterlockedCompareExchange_acq: + case AArch64::BI_InterlockedCompareExchange64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); + case AArch64::BI_InterlockedCompareExchange8_rel: + case AArch64::BI_InterlockedCompareExchange16_rel: + case AArch64::BI_InterlockedCompareExchange_rel: + case AArch64::BI_InterlockedCompareExchange64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); + case AArch64::BI_InterlockedCompareExchange8_nf: + case AArch64::BI_InterlockedCompareExchange16_nf: + case AArch64::BI_InterlockedCompareExchange_nf: + case AArch64::BI_InterlockedCompareExchange64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); + case AArch64::BI_InterlockedOr8_acq: + case AArch64::BI_InterlockedOr16_acq: + case AArch64::BI_InterlockedOr_acq: + case AArch64::BI_InterlockedOr64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); + case AArch64::BI_InterlockedOr8_rel: + case AArch64::BI_InterlockedOr16_rel: + case AArch64::BI_InterlockedOr_rel: + case AArch64::BI_InterlockedOr64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); + case AArch64::BI_InterlockedOr8_nf: + case AArch64::BI_InterlockedOr16_nf: + case AArch64::BI_InterlockedOr_nf: + case AArch64::BI_InterlockedOr64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); + case AArch64::BI_InterlockedXor8_acq: + case AArch64::BI_InterlockedXor16_acq: + case AArch64::BI_InterlockedXor_acq: + case AArch64::BI_InterlockedXor64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); + case AArch64::BI_InterlockedXor8_rel: + case AArch64::BI_InterlockedXor16_rel: + case AArch64::BI_InterlockedXor_rel: + case AArch64::BI_InterlockedXor64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); + case AArch64::BI_InterlockedXor8_nf: + case AArch64::BI_InterlockedXor16_nf: + case AArch64::BI_InterlockedXor_nf: + case AArch64::BI_InterlockedXor64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); + case AArch64::BI_InterlockedAnd8_acq: + case AArch64::BI_InterlockedAnd16_acq: + case AArch64::BI_InterlockedAnd_acq: + case AArch64::BI_InterlockedAnd64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); + case AArch64::BI_InterlockedAnd8_rel: + case AArch64::BI_InterlockedAnd16_rel: + case AArch64::BI_InterlockedAnd_rel: + case AArch64::BI_InterlockedAnd64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); + case AArch64::BI_InterlockedAnd8_nf: + case AArch64::BI_InterlockedAnd16_nf: + case AArch64::BI_InterlockedAnd_nf: + case AArch64::BI_InterlockedAnd64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); + case AArch64::BI_InterlockedIncrement16_acq: + case AArch64::BI_InterlockedIncrement_acq: + case AArch64::BI_InterlockedIncrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); + case AArch64::BI_InterlockedIncrement16_rel: + case AArch64::BI_InterlockedIncrement_rel: + case AArch64::BI_InterlockedIncrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); + case AArch64::BI_InterlockedIncrement16_nf: + case AArch64::BI_InterlockedIncrement_nf: + case AArch64::BI_InterlockedIncrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); + case AArch64::BI_InterlockedDecrement16_acq: + case AArch64::BI_InterlockedDecrement_acq: + case AArch64::BI_InterlockedDecrement64_acq: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); + case AArch64::BI_InterlockedDecrement16_rel: + case AArch64::BI_InterlockedDecrement_rel: + case AArch64::BI_InterlockedDecrement64_rel: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); + case AArch64::BI_InterlockedDecrement16_nf: + case AArch64::BI_InterlockedDecrement_nf: + case AArch64::BI_InterlockedDecrement64_nf: + return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); + + case AArch64::BI_InterlockedAdd: { + Value *Arg0 = EmitScalarExpr(E->getArg(0)); + Value *Arg1 = EmitScalarExpr(E->getArg(1)); + AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( + AtomicRMWInst::Add, Arg0, Arg1, + llvm::AtomicOrdering::SequentiallyConsistent); + return Builder.CreateAdd(RMWI, Arg1); + } } llvm::VectorType *VTy = GetNeonType(this, Type); @@ -9128,151 +9273,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } - case AArch64::BI_BitScanForward: - case AArch64::BI_BitScanForward64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E); - case AArch64::BI_BitScanReverse: - case AArch64::BI_BitScanReverse64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E); - case AArch64::BI_InterlockedAnd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E); - case AArch64::BI_InterlockedExchange64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E); - case AArch64::BI_InterlockedExchangeAdd64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E); - case AArch64::BI_InterlockedExchangeSub64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E); - case AArch64::BI_InterlockedOr64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E); - case AArch64::BI_InterlockedXor64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E); - case AArch64::BI_InterlockedDecrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E); - case AArch64::BI_InterlockedIncrement64: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E); - case AArch64::BI_InterlockedExchangeAdd8_acq: - case AArch64::BI_InterlockedExchangeAdd16_acq: - case AArch64::BI_InterlockedExchangeAdd_acq: - case AArch64::BI_InterlockedExchangeAdd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E); - case AArch64::BI_InterlockedExchangeAdd8_rel: - case AArch64::BI_InterlockedExchangeAdd16_rel: - case AArch64::BI_InterlockedExchangeAdd_rel: - case AArch64::BI_InterlockedExchangeAdd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E); - case AArch64::BI_InterlockedExchangeAdd8_nf: - case AArch64::BI_InterlockedExchangeAdd16_nf: - case AArch64::BI_InterlockedExchangeAdd_nf: - case AArch64::BI_InterlockedExchangeAdd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E); - case AArch64::BI_InterlockedExchange8_acq: - case AArch64::BI_InterlockedExchange16_acq: - case AArch64::BI_InterlockedExchange_acq: - case AArch64::BI_InterlockedExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E); - case AArch64::BI_InterlockedExchange8_rel: - case AArch64::BI_InterlockedExchange16_rel: - case AArch64::BI_InterlockedExchange_rel: - case AArch64::BI_InterlockedExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E); - case AArch64::BI_InterlockedExchange8_nf: - case AArch64::BI_InterlockedExchange16_nf: - case AArch64::BI_InterlockedExchange_nf: - case AArch64::BI_InterlockedExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E); - case AArch64::BI_InterlockedCompareExchange8_acq: - case AArch64::BI_InterlockedCompareExchange16_acq: - case AArch64::BI_InterlockedCompareExchange_acq: - case AArch64::BI_InterlockedCompareExchange64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E); - case AArch64::BI_InterlockedCompareExchange8_rel: - case AArch64::BI_InterlockedCompareExchange16_rel: - case AArch64::BI_InterlockedCompareExchange_rel: - case AArch64::BI_InterlockedCompareExchange64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E); - case AArch64::BI_InterlockedCompareExchange8_nf: - case AArch64::BI_InterlockedCompareExchange16_nf: - case AArch64::BI_InterlockedCompareExchange_nf: - case AArch64::BI_InterlockedCompareExchange64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E); - case AArch64::BI_InterlockedOr8_acq: - case AArch64::BI_InterlockedOr16_acq: - case AArch64::BI_InterlockedOr_acq: - case AArch64::BI_InterlockedOr64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E); - case AArch64::BI_InterlockedOr8_rel: - case AArch64::BI_InterlockedOr16_rel: - case AArch64::BI_InterlockedOr_rel: - case AArch64::BI_InterlockedOr64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E); - case AArch64::BI_InterlockedOr8_nf: - case AArch64::BI_InterlockedOr16_nf: - case AArch64::BI_InterlockedOr_nf: - case AArch64::BI_InterlockedOr64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E); - case AArch64::BI_InterlockedXor8_acq: - case AArch64::BI_InterlockedXor16_acq: - case AArch64::BI_InterlockedXor_acq: - case AArch64::BI_InterlockedXor64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E); - case AArch64::BI_InterlockedXor8_rel: - case AArch64::BI_InterlockedXor16_rel: - case AArch64::BI_InterlockedXor_rel: - case AArch64::BI_InterlockedXor64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E); - case AArch64::BI_InterlockedXor8_nf: - case AArch64::BI_InterlockedXor16_nf: - case AArch64::BI_InterlockedXor_nf: - case AArch64::BI_InterlockedXor64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E); - case AArch64::BI_InterlockedAnd8_acq: - case AArch64::BI_InterlockedAnd16_acq: - case AArch64::BI_InterlockedAnd_acq: - case AArch64::BI_InterlockedAnd64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E); - case AArch64::BI_InterlockedAnd8_rel: - case AArch64::BI_InterlockedAnd16_rel: - case AArch64::BI_InterlockedAnd_rel: - case AArch64::BI_InterlockedAnd64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E); - case AArch64::BI_InterlockedAnd8_nf: - case AArch64::BI_InterlockedAnd16_nf: - case AArch64::BI_InterlockedAnd_nf: - case AArch64::BI_InterlockedAnd64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E); - case AArch64::BI_InterlockedIncrement16_acq: - case AArch64::BI_InterlockedIncrement_acq: - case AArch64::BI_InterlockedIncrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E); - case AArch64::BI_InterlockedIncrement16_rel: - case AArch64::BI_InterlockedIncrement_rel: - case AArch64::BI_InterlockedIncrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E); - case AArch64::BI_InterlockedIncrement16_nf: - case AArch64::BI_InterlockedIncrement_nf: - case AArch64::BI_InterlockedIncrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E); - case AArch64::BI_InterlockedDecrement16_acq: - case AArch64::BI_InterlockedDecrement_acq: - case AArch64::BI_InterlockedDecrement64_acq: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E); - case AArch64::BI_InterlockedDecrement16_rel: - case AArch64::BI_InterlockedDecrement_rel: - case AArch64::BI_InterlockedDecrement64_rel: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E); - case AArch64::BI_InterlockedDecrement16_nf: - case AArch64::BI_InterlockedDecrement_nf: - case AArch64::BI_InterlockedDecrement64_nf: - return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E); - - case AArch64::BI_InterlockedAdd: { - Value *Arg0 = EmitScalarExpr(E->getArg(0)); - Value *Arg1 = EmitScalarExpr(E->getArg(1)); - AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( - AtomicRMWInst::Add, Arg0, Arg1, - llvm::AtomicOrdering::SequentiallyConsistent); - return Builder.CreateAdd(RMWI, Arg1); - } } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp index 695facd50b67..0a57870a7c58 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExprAgg.cpp @@ -1495,6 +1495,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // initializers throws an exception. SmallVector cleanups; llvm::Instruction *cleanupDominator = nullptr; + auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) { + cleanups.push_back(cleanup); + if (!cleanupDominator) // create placeholder once needed + cleanupDominator = CGF.Builder.CreateAlignedLoad( + CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy), + CharUnits::One()); + }; unsigned curInitIndex = 0; @@ -1519,7 +1526,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { if (QualType::DestructionKind dtorKind = Base.getType().isDestructedType()) { CGF.pushDestroy(dtorKind, V, Base.getType()); - cleanups.push_back(CGF.EHStack.stable_begin()); + addCleanup(CGF.EHStack.stable_begin()); } } } @@ -1596,15 +1603,9 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { = field->getType().isDestructedType()) { assert(LV.isSimple()); if (CGF.needsEHCleanup(dtorKind)) { - if (!cleanupDominator) - cleanupDominator = CGF.Builder.CreateAlignedLoad( - CGF.Int8Ty, - llvm::Constant::getNullValue(CGF.Int8PtrTy), - CharUnits::One()); // placeholder - CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(), CGF.getDestroyer(dtorKind), false); - cleanups.push_back(CGF.EHStack.stable_begin()); + addCleanup(CGF.EHStack.stable_begin()); pushedCleanup = true; } } @@ -1620,6 +1621,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) { // Deactivate all the partial cleanups in reverse order, which // generally means popping them. + assert((cleanupDominator || cleanups.empty()) && + "Missing cleanupDominator before deactivating cleanup blocks"); for (unsigned i = cleanups.size(); i != 0; --i) CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp index dd0dea5b94a0..40ab79509f98 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmt.cpp @@ -1846,11 +1846,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput( InputExpr->EvaluateAsRValue(EVResult, getContext(), true); llvm::APSInt IntResult; - if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), - getContext())) - llvm_unreachable("Invalid immediate constant!"); - - return llvm::ConstantInt::get(getLLVMContext(), IntResult); + if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), + getContext())) + return llvm::ConstantInt::get(getLLVMContext(), IntResult); } Expr::EvalResult Result; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index 3b2413d960d6..51a2561a4552 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1755,10 +1755,11 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall( CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty); QualType ThisTy; - if (CE) - ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); - else + if (CE) { + ThisTy = CE->getObjectType(); + } else { ThisTy = D->getDestroyedType(); + } CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr, QualType(), nullptr); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp index fa34414de5da..ca06ad3f042b 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1921,10 +1921,11 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall( DtorType == Dtor_Deleting); QualType ThisTy; - if (CE) - ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType(); - else + if (CE) { + ThisTy = CE->getObjectType(); + } else { ThisTy = D->getDestroyedType(); + } This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true); RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index 5da988fb8a3c..1e1038dbfe95 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -9188,25 +9188,45 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D, namespace { class RISCVABIInfo : public DefaultABIInfo { private: - unsigned XLen; // Size of the integer ('x') registers in bits. + // Size of the integer ('x') registers in bits. + unsigned XLen; + // Size of the floating point ('f') registers in bits. Note that the target + // ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target + // with soft float ABI has FLen==0). + unsigned FLen; static const int NumArgGPRs = 8; + static const int NumArgFPRs = 8; + bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const; public: - RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) - : DefaultABIInfo(CGT), XLen(XLen) {} + RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen) + : DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {} // DefaultABIInfo's classifyReturnType and classifyArgumentType are // non-virtual, but computeInfo is virtual, so we overload it. void computeInfo(CGFunctionInfo &FI) const override; - ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, - int &ArgGPRsLeft) const; + ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, + int &ArgFPRsLeft) const; ABIArgInfo classifyReturnType(QualType RetTy) const; Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty) const override; ABIArgInfo extendType(QualType Ty) const; + + bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, llvm::Type *&Field2Ty, + CharUnits &Field2Off, int &NeededArgGPRs, + int &NeededArgFPRs) const; + ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty, + CharUnits Field1Off, + llvm::Type *Field2Ty, + CharUnits Field2Off) const; }; } // end anonymous namespace @@ -9228,18 +9248,215 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const { // different for variadic arguments, we must also track whether we are // examining a vararg or not. int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs; + int ArgFPRsLeft = FLen ? NumArgFPRs : 0; int NumFixedArgs = FI.getNumRequiredArgs(); int ArgNum = 0; for (auto &ArgInfo : FI.arguments()) { bool IsFixed = ArgNum < NumFixedArgs; - ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft); + ArgInfo.info = + classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft); ArgNum++; } } +// Returns true if the struct is a potential candidate for the floating point +// calling convention. If this function returns true, the caller is +// responsible for checking that if there is only a single field then that +// field is a float. +bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff, + llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off) const { + bool IsInt = Ty->isIntegralOrEnumerationType(); + bool IsFloat = Ty->isRealFloatingType(); + + if (IsInt || IsFloat) { + uint64_t Size = getContext().getTypeSize(Ty); + if (IsInt && Size > XLen) + return false; + // Can't be eligible if larger than the FP registers. Half precision isn't + // currently supported on RISC-V and the ABI hasn't been confirmed, so + // default to the integer ABI in that case. + if (IsFloat && (Size > FLen || Size < 32)) + return false; + // Can't be eligible if an integer type was already found (int+int pairs + // are not eligible). + if (IsInt && Field1Ty && Field1Ty->isIntegerTy()) + return false; + if (!Field1Ty) { + Field1Ty = CGT.ConvertType(Ty); + Field1Off = CurOff; + return true; + } + if (!Field2Ty) { + Field2Ty = CGT.ConvertType(Ty); + Field2Off = CurOff; + return true; + } + return false; + } + + if (auto CTy = Ty->getAs()) { + if (Field1Ty) + return false; + QualType EltTy = CTy->getElementType(); + if (getContext().getTypeSize(EltTy) > FLen) + return false; + Field1Ty = CGT.ConvertType(EltTy); + Field1Off = CurOff; + assert(CurOff.isZero() && "Unexpected offset for first field"); + Field2Ty = Field1Ty; + Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy); + return true; + } + + if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) { + uint64_t ArraySize = ATy->getSize().getZExtValue(); + QualType EltTy = ATy->getElementType(); + CharUnits EltSize = getContext().getTypeSizeInChars(EltTy); + for (uint64_t i = 0; i < ArraySize; ++i) { + bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty, + Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + CurOff += EltSize; + } + return true; + } + + if (const auto *RTy = Ty->getAs()) { + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are not eligible for the FP calling convention. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, CGT.getCXXABI())) + return false; + if (isEmptyRecord(getContext(), Ty, true)) + return true; + const RecordDecl *RD = RTy->getDecl(); + // Unions aren't eligible unless they're empty (which is caught above). + if (RD->isUnion()) + return false; + int ZeroWidthBitFieldCount = 0; + for (const FieldDecl *FD : RD->fields()) { + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); + uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex()); + QualType QTy = FD->getType(); + if (FD->isBitField()) { + unsigned BitWidth = FD->getBitWidthValue(getContext()); + // Allow a bitfield with a type greater than XLen as long as the + // bitwidth is XLen or less. + if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen) + QTy = getContext().getIntTypeForBitwidth(XLen, false); + if (BitWidth == 0) { + ZeroWidthBitFieldCount++; + continue; + } + } + + bool Ret = detectFPCCEligibleStructHelper( + QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits), + Field1Ty, Field1Off, Field2Ty, Field2Off); + if (!Ret) + return false; + + // As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp + // or int+fp structs, but are ignored for a struct with an fp field and + // any number of zero-width bitfields. + if (Field2Ty && ZeroWidthBitFieldCount > 0) + return false; + } + return Field1Ty != nullptr; + } + + return false; +} + +// Determine if a struct is eligible for passing according to the floating +// point calling convention (i.e., when flattened it contains a single fp +// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and +// NeededArgGPRs are incremented appropriately. +bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty, + CharUnits &Field1Off, + llvm::Type *&Field2Ty, + CharUnits &Field2Off, + int &NeededArgGPRs, + int &NeededArgFPRs) const { + Field1Ty = nullptr; + Field2Ty = nullptr; + NeededArgGPRs = 0; + NeededArgFPRs = 0; + bool IsCandidate = detectFPCCEligibleStructHelper( + Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off); + // Not really a candidate if we have a single int but no float. + if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy()) + return IsCandidate = false; + if (!IsCandidate) + return false; + if (Field1Ty && Field1Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field1Ty) + NeededArgGPRs++; + if (Field2Ty && Field2Ty->isFloatingPointTy()) + NeededArgFPRs++; + else if (Field2Ty) + NeededArgGPRs++; + return IsCandidate; +} + +// Call getCoerceAndExpand for the two-element flattened struct described by +// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an +// appropriate coerceToType and unpaddedCoerceToType. +ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct( + llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty, + CharUnits Field2Off) const { + SmallVector CoerceElts; + SmallVector UnpaddedCoerceElts; + if (!Field1Off.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity())); + + CoerceElts.push_back(Field1Ty); + UnpaddedCoerceElts.push_back(Field1Ty); + + if (!Field2Ty) { + return ABIArgInfo::getCoerceAndExpand( + llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()), + UnpaddedCoerceElts[0]); + } + + CharUnits Field2Align = + CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty)); + CharUnits Field1Size = + CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty)); + CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align); + + CharUnits Padding = CharUnits::Zero(); + if (Field2Off > Field2OffNoPadNoPack) + Padding = Field2Off - Field2OffNoPadNoPack; + else if (Field2Off != Field2Align && Field2Off > Field1Size) + Padding = Field2Off - Field1Size; + + bool IsPacked = !Field2Off.isMultipleOf(Field2Align); + + if (!Padding.isZero()) + CoerceElts.push_back(llvm::ArrayType::get( + llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity())); + + CoerceElts.push_back(Field2Ty); + UnpaddedCoerceElts.push_back(Field2Ty); + + auto CoerceToType = + llvm::StructType::get(getVMContext(), CoerceElts, IsPacked); + auto UnpaddedCoerceToType = + llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked); + + return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); +} + ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, - int &ArgGPRsLeft) const { + int &ArgGPRsLeft, + int &ArgFPRsLeft) const { assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow"); Ty = useFirstFieldIfTransparentUnion(Ty); @@ -9257,6 +9474,42 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, return ABIArgInfo::getIgnore(); uint64_t Size = getContext().getTypeSize(Ty); + + // Pass floating point values via FPRs if possible. + if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) { + ArgFPRsLeft--; + return ABIArgInfo::getDirect(); + } + + // Complex types for the hard float ABI must be passed direct rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) { + QualType EltTy = Ty->getAs()->getElementType(); + if (getContext().getTypeSize(EltTy) <= FLen) { + ArgFPRsLeft -= 2; + return ABIArgInfo::getDirect(); + } + } + + if (IsFixed && FLen && Ty->isStructureOrClassType()) { + llvm::Type *Field1Ty = nullptr; + llvm::Type *Field2Ty = nullptr; + CharUnits Field1Off = CharUnits::Zero(); + CharUnits Field2Off = CharUnits::Zero(); + int NeededArgGPRs; + int NeededArgFPRs; + bool IsCandidate = + detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off, + NeededArgGPRs, NeededArgFPRs); + if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft && + NeededArgFPRs <= ArgFPRsLeft) { + ArgGPRsLeft -= NeededArgGPRs; + ArgFPRsLeft -= NeededArgFPRs; + return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty, + Field2Off); + } + } + uint64_t NeededAlign = getContext().getTypeAlign(Ty); bool MustUseStack = false; // Determine the number of GPRs needed to pass the current argument @@ -9315,10 +9568,12 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getIgnore(); int ArgGPRsLeft = 2; + int ArgFPRsLeft = FLen ? 2 : 0; // The rules for return and argument types are the same, so defer to // classifyArgumentType. - return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft); + return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft, + ArgFPRsLeft); } Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, @@ -9353,8 +9608,9 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const { namespace { class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { public: - RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen) - : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {} + RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, + unsigned FLen) + : TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { @@ -9493,9 +9749,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new MSP430TargetCodeGenInfo(Types)); case llvm::Triple::riscv32: - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32)); - case llvm::Triple::riscv64: - return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64)); + case llvm::Triple::riscv64: { + StringRef ABIStr = getTarget().getABI(); + unsigned XLen = getTarget().getPointerWidth(0); + unsigned ABIFLen = 0; + if (ABIStr.endswith("f")) + ABIFLen = 32; + else if (ABIStr.endswith("d")) + ABIFLen = 64; + return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen)); + } case llvm::Triple::systemz: { bool HasVector = getTarget().getABI() == "vector"; diff --git a/contrib/llvm/tools/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp b/contrib/llvm/tools/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp index 6d7d69da4db5..1a66faeb3239 100644 --- a/contrib/llvm/tools/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp +++ b/contrib/llvm/tools/clang/lib/DirectoryWatcher/linux/DirectoryWatcher-linux.cpp @@ -24,7 +24,6 @@ #include #include -#include #include #include #include @@ -335,7 +334,7 @@ std::unique_ptr clang::DirectoryWatcher::create( InotifyFD, Path.str().c_str(), IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MODIFY | IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) +#ifdef IN_EXCL_UNLINK | IN_EXCL_UNLINK #endif ); diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp index cb861f27aeda..2508178423bf 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Clang.cpp @@ -501,8 +501,6 @@ static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) { return codegenoptions::LimitedDebugInfo; } -enum class FramePointerKind { None, NonLeaf, All }; - static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) { switch (Triple.getArch()){ default: @@ -517,9 +515,6 @@ static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) { static bool useFramePointerForTargetByDefault(const ArgList &Args, const llvm::Triple &Triple) { - if (Args.hasArg(options::OPT_pg)) - return true; - switch (Triple.getArch()) { case llvm::Triple::xcore: case llvm::Triple::wasm32: @@ -579,22 +574,32 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args, return true; } -static FramePointerKind getFramePointerKind(const ArgList &Args, - const llvm::Triple &Triple) { - Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer, - options::OPT_fno_omit_frame_pointer); - bool OmitFP = A && A->getOption().matches(options::OPT_fomit_frame_pointer); - bool NoOmitFP = - A && A->getOption().matches(options::OPT_fno_omit_frame_pointer); - if (NoOmitFP || mustUseNonLeafFramePointerForTarget(Triple) || - (!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) { - if (Args.hasFlag(options::OPT_momit_leaf_frame_pointer, - options::OPT_mno_omit_leaf_frame_pointer, - Triple.isPS4CPU())) - return FramePointerKind::NonLeaf; - return FramePointerKind::All; - } - return FramePointerKind::None; +static bool shouldUseFramePointer(const ArgList &Args, + const llvm::Triple &Triple) { + if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer, + options::OPT_fomit_frame_pointer)) + return A->getOption().matches(options::OPT_fno_omit_frame_pointer) || + mustUseNonLeafFramePointerForTarget(Triple); + + if (Args.hasArg(options::OPT_pg)) + return true; + + return useFramePointerForTargetByDefault(Args, Triple); +} + +static bool shouldUseLeafFramePointer(const ArgList &Args, + const llvm::Triple &Triple) { + if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer, + options::OPT_momit_leaf_frame_pointer)) + return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer); + + if (Args.hasArg(options::OPT_pg)) + return true; + + if (Triple.isPS4CPU()) + return false; + + return useFramePointerForTargetByDefault(Args, Triple); } /// Add a CC1 option to specify the debug compilation directory. @@ -3946,12 +3951,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false)) CmdArgs.push_back("-fdefault-calling-conv=stdcall"); - FramePointerKind FPKeepKind = getFramePointerKind(Args, RawTriple); - if (FPKeepKind != FramePointerKind::None) { + if (shouldUseFramePointer(Args, RawTriple)) CmdArgs.push_back("-mdisable-fp-elim"); - if (FPKeepKind == FramePointerKind::NonLeaf) - CmdArgs.push_back("-momit-leaf-frame-pointer"); - } if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss, options::OPT_fno_zero_initialized_in_bss)) CmdArgs.push_back("-mno-zero-initialized-in-bss"); @@ -4136,6 +4137,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(A->getValue()); } + if (!shouldUseLeafFramePointer(Args, RawTriple)) + CmdArgs.push_back("-momit-leaf-frame-pointer"); + // Explicitly error on some things we know we don't support and can't just // ignore. if (!Args.hasArg(options::OPT_fallow_unsupported)) { @@ -5489,7 +5493,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } if (Arg *A = Args.getLastArg(options::OPT_pg)) - if (FPKeepKind == FramePointerKind::None) + if (!shouldUseFramePointer(Args, Triple)) D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer" << A->getAsString(Args); diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp index 7445a94cfe59..783d1f9d0919 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp @@ -435,7 +435,6 @@ void ASTUnit::CacheCodeCompletionResults() { | (1LL << CodeCompletionContext::CCC_UnionTag) | (1LL << CodeCompletionContext::CCC_ClassOrStructTag) | (1LL << CodeCompletionContext::CCC_Type) - | (1LL << CodeCompletionContext::CCC_Symbol) | (1LL << CodeCompletionContext::CCC_SymbolOrNewName) | (1LL << CodeCompletionContext::CCC_ParenthesizedExpression); diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 8a9844096f08..bc54e38a1a63 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -2408,7 +2408,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) - .Case("c++", LangStandard::lang_openclcpp) + .Cases("clc++", "CLC++", LangStandard::lang_openclcpp) .Default(LangStandard::lang_unspecified); if (OpenCLLangStd == LangStandard::lang_unspecified) { diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp index 3906e2ae1b98..6feb7bcbd4b7 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp @@ -437,17 +437,17 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, default: llvm_unreachable("Unsupported OpenCL version"); } - Builder.defineMacro("CL_VERSION_1_0", "100"); - Builder.defineMacro("CL_VERSION_1_1", "110"); - Builder.defineMacro("CL_VERSION_1_2", "120"); - Builder.defineMacro("CL_VERSION_2_0", "200"); - - if (TI.isLittleEndian()) - Builder.defineMacro("__ENDIAN_LITTLE__"); - - if (LangOpts.FastRelaxedMath) - Builder.defineMacro("__FAST_RELAXED_MATH__"); } + Builder.defineMacro("CL_VERSION_1_0", "100"); + Builder.defineMacro("CL_VERSION_1_1", "110"); + Builder.defineMacro("CL_VERSION_1_2", "120"); + Builder.defineMacro("CL_VERSION_2_0", "200"); + + if (TI.isLittleEndian()) + Builder.defineMacro("__ENDIAN_LITTLE__"); + + if (LangOpts.FastRelaxedMath) + Builder.defineMacro("__FAST_RELAXED_MATH__"); } // Not "standard" per se, but available even with the -undef flag. if (LangOpts.AsmPreprocessor) diff --git a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h index 3d55f5f2710f..c8fefdfc792a 100644 --- a/contrib/llvm/tools/clang/lib/Headers/emmintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/emmintrin.h @@ -4029,7 +4029,7 @@ _mm_storeu_si128(__m128i_u *__p, __m128i __b) /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_storeu_si64(void const *__p, __m128i __b) +_mm_storeu_si64(void *__p, __m128i __b) { struct __storeu_si64 { long long __v; @@ -4050,7 +4050,7 @@ _mm_storeu_si64(void const *__p, __m128i __b) /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_storeu_si32(void const *__p, __m128i __b) +_mm_storeu_si32(void *__p, __m128i __b) { struct __storeu_si32 { int __v; @@ -4071,7 +4071,7 @@ _mm_storeu_si32(void const *__p, __m128i __b) /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_storeu_si16(void const *__p, __m128i __b) +_mm_storeu_si16(void *__p, __m128i __b) { struct __storeu_si16 { short __v; diff --git a/contrib/llvm/tools/clang/lib/Headers/opencl-c-base.h b/contrib/llvm/tools/clang/lib/Headers/opencl-c-base.h index a82954ddd326..9a23333a33e6 100644 --- a/contrib/llvm/tools/clang/lib/Headers/opencl-c-base.h +++ b/contrib/llvm/tools/clang/lib/Headers/opencl-c-base.h @@ -126,7 +126,7 @@ typedef double double8 __attribute__((ext_vector_type(8))); typedef double double16 __attribute__((ext_vector_type(16))); #endif -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #define NULL ((void*)0) #endif @@ -276,7 +276,7 @@ typedef uint cl_mem_fence_flags; */ #define CLK_GLOBAL_MEM_FENCE 0x02 -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) typedef enum memory_scope { memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, @@ -288,9 +288,6 @@ typedef enum memory_scope { #endif } memory_scope; -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 - -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 /** * Queue a memory fence to ensure correct ordering of memory * operations between work-items of a work-group to @@ -313,7 +310,7 @@ typedef enum memory_order memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions @@ -389,14 +386,10 @@ typedef enum memory_order #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 // OpenCL v2.0 s6.13.16 - Pipe Functions -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 - // OpenCL v2.0 s6.13.17 - Enqueue Kernels -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 - #define CL_COMPLETE 0x0 #define CL_RUNNING 0x1 #define CL_SUBMITTED 0x2 @@ -435,7 +428,7 @@ typedef struct { size_t localWorkSize[MAX_WORK_DIM]; } ndrange_t; -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_intel_device_side_avc_motion_estimation #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin diff --git a/contrib/llvm/tools/clang/lib/Headers/opencl-c.h b/contrib/llvm/tools/clang/lib/Headers/opencl-c.h index 4207c53ccedb..8741bccec9ad 100644 --- a/contrib/llvm/tools/clang/lib/Headers/opencl-c.h +++ b/contrib/llvm/tools/clang/lib/Headers/opencl-c.h @@ -11,11 +11,11 @@ #include "opencl-c-base.h" -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifndef cl_khr_depth_images #define cl_khr_depth_images #endif //cl_khr_depth_images -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #if __OPENCL_C_VERSION__ < CL_VERSION_2_0 #ifdef cl_khr_3d_image_writes @@ -23,10 +23,10 @@ #endif //cl_khr_3d_image_writes #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end -#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #define __ovld __attribute__((overloadable)) #define __conv __attribute__((convergent)) @@ -6517,11 +6517,11 @@ size_t __ovld __cnfn get_group_id(uint dimindx); */ size_t __ovld __cnfn get_global_offset(uint dimindx); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) size_t __ovld get_enqueued_local_size(uint dimindx); size_t __ovld get_global_linear_id(void); size_t __ovld get_local_linear_id(void); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions @@ -7352,7 +7352,7 @@ half16 __ovld __cnfn fmod(half16 x, half16 y); * Returns fmin(x - floor (x), 0x1.fffffep-1f ). * floor(x) is returned in iptr. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld fract(float x, float *iptr); float2 __ovld fract(float2 x, float2 *iptr); float3 __ovld fract(float3 x, float3 *iptr); @@ -7434,7 +7434,7 @@ half4 __ovld fract(half4 x, __private half4 *iptr); half8 __ovld fract(half8 x, __private half8 *iptr); half16 __ovld fract(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Extract mantissa and exponent from x. For each @@ -7442,7 +7442,7 @@ half16 __ovld fract(half16 x, __private half16 *iptr); * magnitude in the interval [1/2, 1) or 0. Each * component of x equals mantissa returned * 2^exp. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld frexp(float x, int *exp); float2 __ovld frexp(float2 x, int2 *exp); float3 __ovld frexp(float3 x, int3 *exp); @@ -7524,7 +7524,7 @@ half4 __ovld frexp(half4 x, __private int4 *exp); half8 __ovld frexp(half8 x, __private int8 *exp); half16 __ovld frexp(half16 x, __private int16 *exp); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Compute the value of the square root of x^2 + y^2 @@ -7649,7 +7649,7 @@ half8 __ovld __cnfn lgamma(half8 x); half16 __ovld __cnfn lgamma(half16 x); #endif //cl_khr_fp16 -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld lgamma_r(float x, int *signp); float2 __ovld lgamma_r(float2 x, int2 *signp); float3 __ovld lgamma_r(float3 x, int3 *signp); @@ -7731,7 +7731,7 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp); half8 __ovld lgamma_r(half8 x, __private int8 *signp); half16 __ovld lgamma_r(half16 x, __private int16 *signp); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Compute natural logarithm. @@ -7955,7 +7955,7 @@ half16 __ovld __cnfn minmag(half16 x, half16 y); * the argument. It stores the integral part in the object * pointed to by iptr. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld modf(float x, float *iptr); float2 __ovld modf(float2 x, float2 *iptr); float3 __ovld modf(float3 x, float3 *iptr); @@ -8037,7 +8037,7 @@ half4 __ovld modf(half4 x, __private half4 *iptr); half8 __ovld modf(half8 x, __private half8 *iptr); half16 __ovld modf(half16 x, __private half16 *iptr); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Returns a quiet NaN. The nancode may be placed @@ -8215,7 +8215,7 @@ half16 __ovld __cnfn remainder(half16 x, half16 y); * sign as x/y. It stores this signed value in the object * pointed to by quo. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld remquo(float x, float y, int *quo); float2 __ovld remquo(float2 x, float2 y, int2 *quo); float3 __ovld remquo(float3 x, float3 y, int3 *quo); @@ -8298,7 +8298,7 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo); half8 __ovld remquo(half8 x, half8 y, __private int8 *quo); half16 __ovld remquo(half16 x, half16 y, __private int16 *quo); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Round to integral value (using round to nearest * even rounding mode) in floating-point format. @@ -8439,7 +8439,7 @@ half16 __ovld __cnfn sin(half16); * is the return value and computed cosine is returned * in cosval. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld sincos(float x, float *cosval); float2 __ovld sincos(float2 x, float2 *cosval); float3 __ovld sincos(float3 x, float3 *cosval); @@ -8521,7 +8521,7 @@ half4 __ovld sincos(half4 x, __private half4 *cosval); half8 __ovld sincos(half8 x, __private half8 *cosval); half16 __ovld sincos(half16 x, __private half16 *cosval); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Compute hyperbolic sine. @@ -9446,7 +9446,7 @@ ulong16 __ovld __cnfn clz(ulong16 x); * returns the size in bits of the type of x or * component type of x, if x is a vector. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) char __ovld ctz(char x); uchar __ovld ctz(uchar x); char2 __ovld ctz(char2 x); @@ -9495,7 +9495,7 @@ long8 __ovld ctz(long8 x); ulong8 __ovld ctz(ulong8 x); long16 __ovld ctz(long16 x); ulong16 __ovld ctz(ulong16 x); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Returns mul_hi(a, b) + c. @@ -11340,7 +11340,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p); half16 __ovld vload16(size_t offset, const __constant half *p); #endif //cl_khr_fp16 -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) char2 __ovld vload2(size_t offset, const char *p); uchar2 __ovld vload2(size_t offset, const uchar *p); short2 __ovld vload2(size_t offset, const short *p); @@ -11578,9 +11578,9 @@ half4 __ovld vload4(size_t offset, const __private half *p); half8 __ovld vload8(size_t offset, const __private half *p); half16 __ovld vload16(size_t offset, const __private half *p); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld vstore2(char2 data, size_t offset, char *p); void __ovld vstore2(uchar2 data, size_t offset, uchar *p); void __ovld vstore2(short2 data, size_t offset, short *p); @@ -11814,7 +11814,7 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p); void __ovld vstore8(half8 data, size_t offset, __private half *p); void __ovld vstore16(half16 data, size_t offset, __private half *p); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Read sizeof (half) bytes of data from address @@ -11825,13 +11825,13 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p); * must be 16-bit aligned. */ float __ovld vload_half(size_t offset, const __constant half *p); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld vload_half(size_t offset, const half *p); #else float __ovld vload_half(size_t offset, const __global half *p); float __ovld vload_half(size_t offset, const __local half *p); float __ovld vload_half(size_t offset, const __private half *p); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Read sizeof (halfn) bytes of data from address @@ -11846,7 +11846,7 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p); float4 __ovld vload_half4(size_t offset, const __constant half *p); float8 __ovld vload_half8(size_t offset, const __constant half *p); float16 __ovld vload_half16(size_t offset, const __constant half *p); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float2 __ovld vload_half2(size_t offset, const half *p); float3 __ovld vload_half3(size_t offset, const half *p); float4 __ovld vload_half4(size_t offset, const half *p); @@ -11868,7 +11868,7 @@ float3 __ovld vload_half3(size_t offset, const __private half *p); float4 __ovld vload_half4(size_t offset, const __private half *p); float8 __ovld vload_half8(size_t offset, const __private half *p); float16 __ovld vload_half16(size_t offset, const __private half *p); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * The float value given by data is first @@ -11881,7 +11881,7 @@ float16 __ovld vload_half16(size_t offset, const __private half *p); * The default current rounding mode is round to * nearest even. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld vstore_half(float data, size_t offset, half *p); void __ovld vstore_half_rte(float data, size_t offset, half *p); void __ovld vstore_half_rtz(float data, size_t offset, half *p); @@ -11927,7 +11927,7 @@ void __ovld vstore_half_rtz(double data, size_t offset, __private half *p); void __ovld vstore_half_rtp(double data, size_t offset, __private half *p); void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * The floatn value given by data is converted to @@ -11940,7 +11940,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p); * The default current rounding mode is round to * nearest even. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld vstore_half2(float2 data, size_t offset, half *p); void __ovld vstore_half3(float3 data, size_t offset, half *p); void __ovld vstore_half4(float4 data, size_t offset, half *p); @@ -12146,7 +12146,7 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p); void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p); void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * For n = 1, 2, 4, 8 and 16 read sizeof (halfn) @@ -12167,7 +12167,7 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p); float4 __ovld vloada_half4(size_t offset, const __constant half *p); float8 __ovld vloada_half8(size_t offset, const __constant half *p); float16 __ovld vloada_half16(size_t offset, const __constant half *p); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float __ovld vloada_half(size_t offset, const half *p); float2 __ovld vloada_half2(size_t offset, const half *p); float3 __ovld vloada_half3(size_t offset, const half *p); @@ -12193,7 +12193,7 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p); float4 __ovld vloada_half4(size_t offset, const __private half *p); float8 __ovld vloada_half8(size_t offset, const __private half *p); float16 __ovld vloada_half16(size_t offset, const __private half *p); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * The floatn value given by data is converted to @@ -12211,7 +12211,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p); * mode. The default current rounding mode is * round to nearest even. */ -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld vstorea_half(float data, size_t offset, half *p); void __ovld vstorea_half2(float2 data, size_t offset, half *p); void __ovld vstorea_half3(float3 data, size_t offset, half *p); @@ -12496,7 +12496,7 @@ void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p); void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p); void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); #endif //cl_khr_fp64 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions @@ -12532,10 +12532,10 @@ void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); void __ovld __conv barrier(cl_mem_fence_flags flags); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); void __ovld __conv work_group_barrier(cl_mem_fence_flags flags); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions @@ -12580,7 +12580,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags); // OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) cl_mem_fence_flags __ovld get_fence(const void *ptr); cl_mem_fence_flags __ovld get_fence(void *ptr); @@ -12591,7 +12591,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr); * where gentype is builtin type or user defined type. */ -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch @@ -13371,7 +13371,7 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v // OpenCL v2.0 s6.13.11 - Atomics Functions -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) @@ -13692,7 +13692,7 @@ void __ovld atomic_flag_clear(volatile atomic_flag *object); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order); void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions @@ -14186,7 +14186,7 @@ half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask); half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask); #endif //cl_khr_fp16 -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); @@ -14307,7 +14307,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord); uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord); -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord); float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord); @@ -14315,7 +14315,7 @@ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord); uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord); uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord); -#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord); float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord); @@ -14325,7 +14325,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, f uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord); uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord); -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord); float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord); @@ -14333,7 +14333,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord); uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord); uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord); -#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #ifdef cl_khr_depth_images float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord); @@ -14358,7 +14358,7 @@ float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, in #endif //cl_khr_gl_msaa_sharing // OpenCL Extension v2.0 s9.18 - Mipmaps -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod); @@ -14410,9 +14410,9 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); #endif //cl_khr_mipmap_image -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) /** * Sampler-less Image Access @@ -14447,7 +14447,7 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord); int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord); uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord); -#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) // Image read functions returning half4 type #ifdef cl_khr_fp16 @@ -14457,7 +14457,7 @@ half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord); half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord); half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord); -#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord); half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord); half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord); @@ -14471,11 +14471,11 @@ half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord); half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord); half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord); half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord); -#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) #endif //cl_khr_fp16 // Image read functions for read_write images -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord); int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord); uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord); @@ -14518,7 +14518,7 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); @@ -14569,7 +14569,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); #endif //cl_khr_mipmap_image -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Image read functions returning half4 type #ifdef cl_khr_fp16 @@ -14580,7 +14580,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord); half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord); half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Write color value to location specified by coordinate @@ -14681,7 +14681,7 @@ void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, flo #endif //cl_khr_depth_images // OpenCL Extension v2.0 s9.18 - Mipmaps -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color); void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color); @@ -14708,7 +14708,7 @@ void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 c void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color); #endif #endif //cl_khr_mipmap_image -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Image write functions for half4 type #ifdef cl_khr_fp16 @@ -14723,7 +14723,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col #endif //cl_khr_fp16 // Image write functions for read_write images -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color); void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color); void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color); @@ -14755,7 +14755,7 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color); #endif //cl_khr_depth_images -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color); void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color); @@ -14782,7 +14782,7 @@ void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 c void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color); #endif #endif //cl_khr_mipmap_image -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Image write functions for half4 type #ifdef cl_khr_fp16 @@ -14795,7 +14795,7 @@ void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 col void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color); void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color); #endif //cl_khr_fp16 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have // access qualifier, which by default assume read_only access qualifier. Image query builtin @@ -14843,7 +14843,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image); int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __cnfn get_image_width(read_write image1d_t image); int __ovld __cnfn get_image_width(read_write image1d_buffer_t image); int __ovld __cnfn get_image_width(read_write image2d_t image); @@ -14860,7 +14860,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image); int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the image height in pixels. @@ -14895,7 +14895,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image); int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __cnfn get_image_height(read_write image2d_t image); int __ovld __cnfn get_image_height(read_write image3d_t image); int __ovld __cnfn get_image_height(read_write image2d_array_t image); @@ -14909,7 +14909,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image); int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the image depth in pixels. @@ -14920,12 +14920,12 @@ int __ovld __cnfn get_image_depth(read_only image3d_t image); int __ovld __cnfn get_image_depth(write_only image3d_t image); #endif -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __cnfn get_image_depth(read_write image3d_t image); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL Extension v2.0 s9.18 - Mipmaps -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #ifdef cl_khr_mipmap_image /** * Return the image miplevels. @@ -14961,7 +14961,7 @@ int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image); int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); #endif //cl_khr_mipmap_image -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the channel data type. Valid values are: @@ -15018,7 +15018,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image); int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image); int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image); @@ -15035,7 +15035,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image); int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the image channel order. Valid values are: @@ -15090,7 +15090,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image) int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __cnfn get_image_channel_order(read_write image1d_t image); int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image); int __ovld __cnfn get_image_channel_order(read_write image2d_t image); @@ -15107,7 +15107,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image) int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image); int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the 2D image width and height as an int2 @@ -15140,7 +15140,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image); int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int2 __ovld __cnfn get_image_dim(read_write image2d_t image); int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image); #ifdef cl_khr_depth_images @@ -15153,7 +15153,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image); int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the 3D image width, height, and depth as an @@ -15165,9 +15165,9 @@ int4 __ovld __cnfn get_image_dim(read_only image3d_t image); #ifdef cl_khr_3d_image_writes int4 __ovld __cnfn get_image_dim(write_only image3d_t image); #endif -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int4 __ovld __cnfn get_image_dim(read_write image3d_t image); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the image array size. @@ -15193,7 +15193,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array); #endif //cl_khr_gl_msaa_sharing -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array); #ifdef cl_khr_depth_images @@ -15203,7 +15203,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array); size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array); #endif //cl_khr_gl_msaa_sharing -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) /** * Return the number of samples associated with image @@ -15219,17 +15219,17 @@ int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image); int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld get_image_num_samples(read_write image2d_msaa_t image); int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) #endif // OpenCL v2.0 s6.13.15 - Work-group Functions -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __conv work_group_all(int predicate); int __ovld __conv work_group_any(int predicate); @@ -15327,16 +15327,16 @@ double __ovld __conv work_group_scan_inclusive_min(double x); double __ovld __conv work_group_scan_inclusive_max(double x); #endif //cl_khr_fp64 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v2.0 s6.13.16 - Pipe Functions -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL v2.0 s6.13.17 - Enqueue Kernels -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ndrange_t __ovld ndrange_1D(size_t); ndrange_t __ovld ndrange_1D(size_t, size_t); @@ -15365,7 +15365,7 @@ bool __ovld is_valid_event (clk_event_t event); void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value); queue_t __ovld get_default_queue(void); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) // OpenCL Extension v2.0 s9.17 - Sub-groups @@ -15374,16 +15374,16 @@ queue_t __ovld get_default_queue(void); uint __ovld get_sub_group_size(void); uint __ovld get_max_sub_group_size(void); uint __ovld get_num_sub_groups(void); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_enqueued_num_sub_groups(void); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld get_sub_group_id(void); uint __ovld get_sub_group_local_id(void); void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags); -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope); -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) int __ovld __conv sub_group_all(int predicate); int __ovld __conv sub_group_any(int predicate); @@ -15573,12 +15573,12 @@ uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, in uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord ); uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord ); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord); uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord); uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord); uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld __conv intel_sub_group_block_read( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p ); @@ -15590,12 +15590,12 @@ void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, i void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data); void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data); void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data); void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data); void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data ); @@ -15713,12 +15713,12 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t ima uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord ); uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord ); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord ); uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord ); uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord ); uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord ); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p ); uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p ); @@ -15730,12 +15730,12 @@ void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t im void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data ); void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data ); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data ); void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data ); void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data ); void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data ); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data ); void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data ); @@ -15747,12 +15747,12 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t im ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord ); ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord ); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord); ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord); ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord); ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p ); ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p ); @@ -15764,12 +15764,12 @@ void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t i void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data); void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data); -#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data); void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data); void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data); void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data); -#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) +#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0) void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data ); void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data ); diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp index 2756042f23eb..5658f46c99de 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp @@ -33,6 +33,7 @@ #include "clang/Lex/Token.h" #include "clang/Lex/VariadicMacroSupport.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" @@ -2399,6 +2400,13 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( Token Tok; LexUnexpandedToken(Tok); + // Ensure we consume the rest of the macro body if errors occur. + auto _ = llvm::make_scope_exit([&]() { + // The flag indicates if we are still waiting for 'eod'. + if (CurLexer->ParsingPreprocessorDirective) + DiscardUntilEndOfDirective(); + }); + // Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk // within their appropriate context. VariadicMacroScopeGuard VariadicMacroScopeGuard(*this); @@ -2420,12 +2428,8 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody( } else if (Tok.is(tok::l_paren)) { // This is a function-like macro definition. Read the argument list. MI->setIsFunctionLike(); - if (ReadMacroParameterList(MI, LastTok)) { - // Throw away the rest of the line. - if (CurPPLexer->ParsingPreprocessorDirective) - DiscardUntilEndOfDirective(); + if (ReadMacroParameterList(MI, LastTok)) return nullptr; - } // If this is a definition of an ISO C/C++ variadic function-like macro (not // using the GNU named varargs extension) inform our variadic scope guard diff --git a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp index 11fed28b52db..485d39e2c9e8 100644 --- a/contrib/llvm/tools/clang/lib/Sema/Sema.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/Sema.cpp @@ -961,6 +961,7 @@ void Sema::ActOnEndOfTranslationUnit() { // All dllexport classes should have been processed already. assert(DelayedDllExportClasses.empty()); + assert(DelayedDllExportMemberFunctions.empty()); // Remove file scoped decls that turned out to be used. UnusedFileScopedDecls.erase( diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp index a6c52b7d4b2b..8f19edbc4f36 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp @@ -1984,10 +1984,27 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID, ASTContext::GetBuiltinTypeError Error; QualType R = Context.GetBuiltinType(ID, Error); if (Error) { - if (ForRedeclaration) - Diag(Loc, diag::warn_implicit_decl_requires_sysheader) - << getHeaderName(Context.BuiltinInfo, ID, Error) + if (!ForRedeclaration) + return nullptr; + + // If we have a builtin without an associated type we should not emit a + // warning when we were not able to find a type for it. + if (Error == ASTContext::GE_Missing_type) + return nullptr; + + // If we could not find a type for setjmp it is because the jmp_buf type was + // not defined prior to the setjmp declaration. + if (Error == ASTContext::GE_Missing_setjmp) { + Diag(Loc, diag::warn_implicit_decl_no_jmp_buf) << Context.BuiltinInfo.getName(ID); + return nullptr; + } + + // Generally, we emit a warning that the declaration requires the + // appropriate header. + Diag(Loc, diag::warn_implicit_decl_requires_sysheader) + << getHeaderName(Context.BuiltinInfo, ID, Error) + << Context.BuiltinInfo.getName(ID); return nullptr; } @@ -11527,9 +11544,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { // Check for self-references within variable initializers. // Variables declared within a function/method body (except for references) // are handled by a dataflow analysis. - if (!VDecl->hasLocalStorage() || VDecl->getType()->isRecordType() || - VDecl->getType()->isReferenceType()) { - CheckSelfReference(*this, RealDecl, Init, DirectInit); + // This is undefined behavior in C++, but valid in C. + if (getLangOpts().CPlusPlus) { + if (!VDecl->hasLocalStorage() || VDecl->getType()->isRecordType() || + VDecl->getType()->isReferenceType()) { + CheckSelfReference(*this, RealDecl, Init, DirectInit); + } } // If the type changed, it means we had an incomplete type that was diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp index 9a6385f28319..15984f89e22d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp @@ -1030,8 +1030,10 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T, TemplateArgumentListInfo Args(Loc, Loc); Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T)); - // If there's no tuple_size specialization, it's not tuple-like. - if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /*DiagID*/0)) + // If there's no tuple_size specialization or the lookup of 'value' is empty, + // it's not tuple-like. + if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /*DiagID*/ 0) || + R.empty()) return IsTupleLike::NotTupleLike; // If we get this far, we've committed to the tuple interpretation, but @@ -1048,11 +1050,6 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T, } } Diagnoser(R, Args); - if (R.empty()) { - Diagnoser.diagnoseNotICE(S, Loc, SourceRange()); - return IsTupleLike::Error; - } - ExprResult E = S.BuildDeclarationNameExpr(CXXScopeSpec(), R, /*NeedsADL*/false); if (E.isInvalid()) @@ -6165,8 +6162,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) { M->dropAttr(); if (M->hasAttr()) { - DefineImplicitSpecialMember(*this, M, M->getLocation()); - ActOnFinishInlineFunctionDef(M); + // Define after any fields with in-class initializers have been parsed. + DelayedDllExportMemberFunctions.push_back(M); } } }; @@ -11419,6 +11416,21 @@ void Sema::ActOnFinishCXXMemberDecls() { void Sema::ActOnFinishCXXNonNestedClass(Decl *D) { referenceDLLExportedClassMethods(); + + if (!DelayedDllExportMemberFunctions.empty()) { + SmallVector WorkList; + std::swap(DelayedDllExportMemberFunctions, WorkList); + for (CXXMethodDecl *M : WorkList) { + DefineImplicitSpecialMember(*this, M, M->getLocation()); + + // Pass the method to the consumer to get emitted. This is not necessary + // for explicit instantiation definitions, as they will get emitted + // anyway. + if (M->getParent()->getTemplateSpecializationKind() != + TSK_ExplicitInstantiationDefinition) + ActOnFinishInlineFunctionDef(M); + } + } } void Sema::referenceDLLExportedClassMethods() { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp index 705e3b9bd7fb..c1c08969c7bd 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -6794,14 +6794,10 @@ ExprResult Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base, // it's legal for the type to be incomplete if this is a pseudo-destructor // call. We'll do more incomplete-type checks later in the lookup process, // so just skip this check for ObjC types. - if (BaseType->isObjCObjectOrInterfaceType()) { + if (!BaseType->isRecordType()) { ObjectType = ParsedType::make(BaseType); MayBePseudoDestructor = true; return Base; - } else if (!BaseType->isRecordType()) { - ObjectType = nullptr; - MayBePseudoDestructor = true; - return Base; } // The object type must be complete (or dependent), or diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp index bc1069609336..60f34775c6b2 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaInit.cpp @@ -1289,7 +1289,16 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, // FIXME: Better EqualLoc? InitializationKind Kind = InitializationKind::CreateCopy(expr->getBeginLoc(), SourceLocation()); - InitializationSequence Seq(SemaRef, Entity, Kind, expr, + + // Vector elements can be initialized from other vectors in which case + // we need initialization entity with a type of a vector (and not a vector + // element!) initializing multiple vector elements. + auto TmpEntity = + (ElemType->isExtVectorType() && !Entity.getType()->isExtVectorType()) + ? InitializedEntity::InitializeTemporary(ElemType) + : Entity; + + InitializationSequence Seq(SemaRef, TmpEntity, Kind, expr, /*TopLevelOfInitList*/ true); // C++14 [dcl.init.aggr]p13: @@ -1300,8 +1309,7 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, // assignment-expression. if (Seq || isa(expr)) { if (!VerifyOnly) { - ExprResult Result = - Seq.Perform(SemaRef, Entity, Kind, expr); + ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr); if (Result.isInvalid()) hadError = true; @@ -8108,7 +8116,7 @@ ExprResult InitializationSequence::Perform(Sema &S, // argument passing. assert(Step->Type->isSamplerT() && "Sampler initialization on non-sampler type."); - Expr *Init = CurInit.get(); + Expr *Init = CurInit.get()->IgnoreParens(); QualType SourceType = Init->getType(); // Case 1 if (Entity.isParameterKind()) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp index bd68011c18b2..4ac87469bf44 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOpenMP.cpp @@ -139,6 +139,7 @@ class DSAStackTy { /// clause, false otherwise. llvm::Optional> OrderedRegion; unsigned AssociatedLoops = 1; + bool HasMutipleLoops = false; const Decl *PossiblyLoopCounter = nullptr; bool NowaitRegion = false; bool CancelRegion = false; @@ -678,12 +679,19 @@ class DSAStackTy { /// Set collapse value for the region. void setAssociatedLoops(unsigned Val) { getTopOfStack().AssociatedLoops = Val; + if (Val > 1) + getTopOfStack().HasMutipleLoops = true; } /// Return collapse value for region. unsigned getAssociatedLoops() const { const SharingMapTy *Top = getTopOfStackOrNull(); return Top ? Top->AssociatedLoops : 0; } + /// Returns true if the construct is associated with multiple loops. + bool hasMutipleLoops() const { + const SharingMapTy *Top = getTopOfStackOrNull(); + return Top ? Top->HasMutipleLoops : false; + } /// Marks current target region as one with closely nested teams /// region. @@ -1874,6 +1882,13 @@ bool Sema::isOpenMPPrivateDecl(const ValueDecl *D, unsigned Level) const { !isOpenMPSimdDirective(DSAStack->getCurrentDirective())) return true; } + if (const auto *VD = dyn_cast(D)) { + if (DSAStack->isThreadPrivate(const_cast(VD)) && + DSAStack->isForceVarCapturing() && + !DSAStack->hasExplicitDSA( + D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level)) + return true; + } return DSAStack->hasExplicitDSA( D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) || (DSAStack->isClauseParsingMode() && @@ -5604,13 +5619,14 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { if (!ISC.checkAndSetInit(Init, /*EmitDiags=*/false)) { if (ValueDecl *D = ISC.getLoopDecl()) { auto *VD = dyn_cast(D); + DeclRefExpr *PrivateRef = nullptr; if (!VD) { if (VarDecl *Private = isOpenMPCapturedDecl(D)) { VD = Private; } else { - DeclRefExpr *Ref = buildCapture(*this, D, ISC.getLoopDeclRefExpr(), - /*WithInit=*/false); - VD = cast(Ref->getDecl()); + PrivateRef = buildCapture(*this, D, ISC.getLoopDeclRefExpr(), + /*WithInit=*/false); + VD = cast(PrivateRef->getDecl()); } } DSAStack->addLoopControlVariable(D, VD); @@ -5623,6 +5639,49 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) { Var->getType().getNonLValueExprType(Context), ForLoc, /*RefersToCapture=*/true)); } + OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); + // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables + // Referenced in a Construct, C/C++]. The loop iteration variable in the + // associated for-loop of a simd construct with just one associated + // for-loop may be listed in a linear clause with a constant-linear-step + // that is the increment of the associated for-loop. The loop iteration + // variable(s) in the associated for-loop(s) of a for or parallel for + // construct may be listed in a private or lastprivate clause. + DSAStackTy::DSAVarData DVar = + DSAStack->getTopDSA(D, /*FromParent=*/false); + // If LoopVarRefExpr is nullptr it means the corresponding loop variable + // is declared in the loop and it is predetermined as a private. + Expr *LoopDeclRefExpr = ISC.getLoopDeclRefExpr(); + OpenMPClauseKind PredeterminedCKind = + isOpenMPSimdDirective(DKind) + ? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear) + : OMPC_private; + if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && + DVar.CKind != PredeterminedCKind && DVar.RefExpr && + (LangOpts.OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate && + DVar.CKind != OMPC_private))) || + ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || + isOpenMPDistributeDirective(DKind)) && + !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && + DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && + (DVar.CKind != OMPC_private || DVar.RefExpr)) { + Diag(Init->getBeginLoc(), diag::err_omp_loop_var_dsa) + << getOpenMPClauseName(DVar.CKind) + << getOpenMPDirectiveName(DKind) + << getOpenMPClauseName(PredeterminedCKind); + if (DVar.RefExpr == nullptr) + DVar.CKind = PredeterminedCKind; + reportOriginalDsa(*this, DSAStack, D, DVar, + /*IsLoopIterVar=*/true); + } else if (LoopDeclRefExpr) { + // Make the loop iteration variable private (for worksharing + // constructs), linear (for simd directives with the only one + // associated loop) or lastprivate (for simd directives with several + // collapsed or ordered loops). + if (DVar.CKind == OMPC_unknown) + DSAStack->addDSA(D, LoopDeclRefExpr, PredeterminedCKind, + PrivateRef); + } } } DSAStack->setAssociatedLoops(AssociatedLoops - 1); @@ -5677,8 +5736,6 @@ static bool checkOpenMPIterationSpace( // Check loop variable's type. if (ValueDecl *LCDecl = ISC.getLoopDecl()) { - Expr *LoopDeclRefExpr = ISC.getLoopDeclRefExpr(); - // OpenMP [2.6, Canonical Loop Form] // Var is one of the following: // A variable of signed or unsigned integer type. @@ -5704,46 +5761,6 @@ static bool checkOpenMPIterationSpace( // sharing attributes. VarsWithImplicitDSA.erase(LCDecl); - // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced - // in a Construct, C/C++]. - // The loop iteration variable in the associated for-loop of a simd - // construct with just one associated for-loop may be listed in a linear - // clause with a constant-linear-step that is the increment of the - // associated for-loop. - // The loop iteration variable(s) in the associated for-loop(s) of a for or - // parallel for construct may be listed in a private or lastprivate clause. - DSAStackTy::DSAVarData DVar = DSA.getTopDSA(LCDecl, false); - // If LoopVarRefExpr is nullptr it means the corresponding loop variable is - // declared in the loop and it is predetermined as a private. - OpenMPClauseKind PredeterminedCKind = - isOpenMPSimdDirective(DKind) - ? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate) - : OMPC_private; - if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && - DVar.CKind != PredeterminedCKind && DVar.RefExpr && - (SemaRef.getLangOpts().OpenMP <= 45 || - (DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) || - ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop || - isOpenMPDistributeDirective(DKind)) && - !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown && - DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) && - (DVar.CKind != OMPC_private || DVar.RefExpr)) { - SemaRef.Diag(Init->getBeginLoc(), diag::err_omp_loop_var_dsa) - << getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind) - << getOpenMPClauseName(PredeterminedCKind); - if (DVar.RefExpr == nullptr) - DVar.CKind = PredeterminedCKind; - reportOriginalDsa(SemaRef, &DSA, LCDecl, DVar, /*IsLoopIterVar=*/true); - HasErrors = true; - } else if (LoopDeclRefExpr != nullptr) { - // Make the loop iteration variable private (for worksharing constructs), - // linear (for simd directives with the only one associated loop) or - // lastprivate (for simd directives with several collapsed or ordered - // loops). - if (DVar.CKind == OMPC_unknown) - DSA.addDSA(LCDecl, LoopDeclRefExpr, PredeterminedCKind); - } - assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars"); // Check test-expr. diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp index b123a739a7ab..9b051e02d127 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaStmtAsm.cpp @@ -383,25 +383,19 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple, } else if (Info.requiresImmediateConstant() && !Info.allowsRegister()) { if (!InputExpr->isValueDependent()) { Expr::EvalResult EVResult; - if (!InputExpr->EvaluateAsRValue(EVResult, Context, true)) - return StmtError( - Diag(InputExpr->getBeginLoc(), diag::err_asm_immediate_expected) - << Info.getConstraintStr() << InputExpr->getSourceRange()); - - // For compatibility with GCC, we also allow pointers that would be - // integral constant expressions if they were cast to int. - llvm::APSInt IntResult; - if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), - Context)) - return StmtError( - Diag(InputExpr->getBeginLoc(), diag::err_asm_immediate_expected) - << Info.getConstraintStr() << InputExpr->getSourceRange()); - - if (!Info.isValidAsmImmediate(IntResult)) - return StmtError(Diag(InputExpr->getBeginLoc(), - diag::err_invalid_asm_value_for_constraint) - << IntResult.toString(10) << Info.getConstraintStr() - << InputExpr->getSourceRange()); + if (InputExpr->EvaluateAsRValue(EVResult, Context, true)) { + // For compatibility with GCC, we also allow pointers that would be + // integral constant expressions if they were cast to int. + llvm::APSInt IntResult; + if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(), + Context)) + if (!Info.isValidAsmImmediate(IntResult)) + return StmtError(Diag(InputExpr->getBeginLoc(), + diag::err_invalid_asm_value_for_constraint) + << IntResult.toString(10) + << Info.getConstraintStr() + << InputExpr->getSourceRange()); + } } } else { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp index 3212281cc34d..ec4b63a2e508 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplate.cpp @@ -362,13 +362,27 @@ bool Sema::LookupTemplateName(LookupResult &Found, // x->B::f, and we are looking into the type of the object. assert(!SS.isSet() && "ObjectType and scope specifier cannot coexist"); LookupCtx = computeDeclContext(ObjectType); - IsDependent = !LookupCtx; + IsDependent = !LookupCtx && ObjectType->isDependentType(); assert((IsDependent || !ObjectType->isIncompleteType() || ObjectType->castAs()->isBeingDefined()) && "Caller should have completed object type"); - // Template names cannot appear inside an Objective-C class or object type. - if (ObjectType->isObjCObjectOrInterfaceType()) { + // Template names cannot appear inside an Objective-C class or object type + // or a vector type. + // + // FIXME: This is wrong. For example: + // + // template using Vec = T __attribute__((ext_vector_type(4))); + // Vec vi; + // vi.Vec::~Vec(); + // + // ... should be accepted but we will not treat 'Vec' as a template name + // here. The right thing to do would be to check if the name is a valid + // vector component name, and look up a template name if not. And similarly + // for lookups into Objective-C class and object types, where the same + // problem can arise. + if (ObjectType->isObjCObjectOrInterfaceType() || + ObjectType->isVectorType()) { Found.clear(); return false; } diff --git a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp index 6f1060b5f26d..600458a743ea 100644 --- a/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp +++ b/contrib/llvm/tools/clang/lib/StaticAnalyzer/Checkers/IteratorChecker.cpp @@ -406,13 +406,15 @@ void IteratorChecker::checkPreCall(const CallEvent &Call, } else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) { if (const auto *InstCall = dyn_cast(&Call)) { // Check for out-of-range incrementions and decrementions - if (Call.getNumArgs() >= 1) { + if (Call.getNumArgs() >= 1 && + Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) { verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(), InstCall->getCXXThisVal(), Call.getArgSVal(0)); } } else { - if (Call.getNumArgs() >= 2) { + if (Call.getNumArgs() >= 2 && + Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) { verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(), Call.getArgSVal(0), Call.getArgSVal(1)); } @@ -590,14 +592,16 @@ void IteratorChecker::checkPostCall(const CallEvent &Call, return; } else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) { if (const auto *InstCall = dyn_cast(&Call)) { - if (Call.getNumArgs() >= 1) { + if (Call.getNumArgs() >= 1 && + Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) { handleRandomIncrOrDecr(C, Func->getOverloadedOperator(), Call.getReturnValue(), InstCall->getCXXThisVal(), Call.getArgSVal(0)); return; } } else { - if (Call.getNumArgs() >= 2) { + if (Call.getNumArgs() >= 2 && + Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) { handleRandomIncrOrDecr(C, Func->getOverloadedOperator(), Call.getReturnValue(), Call.getArgSVal(0), Call.getArgSVal(1)); diff --git a/contrib/llvm/tools/lld/COFF/Driver.cpp b/contrib/llvm/tools/lld/COFF/Driver.cpp index d7af50b9318f..7214d12bde8a 100644 --- a/contrib/llvm/tools/lld/COFF/Driver.cpp +++ b/contrib/llvm/tools/lld/COFF/Driver.cpp @@ -184,8 +184,10 @@ void LinkerDriver::addBuffer(std::unique_ptr mb, if (wholeArchive) { std::unique_ptr file = CHECK(Archive::create(mbref), filename + ": failed to parse archive"); + Archive *archive = file.get(); + make>(std::move(file)); // take ownership - for (MemoryBufferRef m : getArchiveMembers(file.get())) + for (MemoryBufferRef m : getArchiveMembers(archive)) addArchiveBuffer(m, "", filename, 0); return; } diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp index 36ef87de4263..cc75db0f519c 100644 --- a/contrib/llvm/tools/lld/COFF/Writer.cpp +++ b/contrib/llvm/tools/lld/COFF/Writer.cpp @@ -762,6 +762,28 @@ void Writer::locateImportTables() { } } +// Return whether a SectionChunk's suffix (the dollar and any trailing +// suffix) should be removed and sorted into the main suffixless +// PartialSection. +static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { + // On MinGW, comdat groups are formed by putting the comdat group name + // after the '$' in the section name. For .eh_frame$, that must + // still be sorted before the .eh_frame trailer from crtend.o, thus just + // strip the section name trailer. For other sections, such as + // .tls$$ (where non-comdat .tls symbols are otherwise stored in + // ".tls$"), they must be strictly sorted after .tls. And for the + // hypothetical case of comdat .CRT$XCU, we definitely need to keep the + // suffix for sorting. Thus, to play it safe, only strip the suffix for + // the standard sections. + if (!config->mingw) + return false; + if (!sc || !sc->isCOMDAT()) + return false; + return name.startswith(".text$") || name.startswith(".data$") || + name.startswith(".rdata$") || name.startswith(".pdata$") || + name.startswith(".xdata$") || name.startswith(".eh_frame$"); +} + // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. @@ -807,10 +829,7 @@ void Writer::createSections() { continue; } StringRef name = c->getSectionName(); - // On MinGW, comdat groups are formed by putting the comdat group name - // after the '$' in the section name. Such a section name suffix shouldn't - // imply separate alphabetical sorting of those section chunks though. - if (config->mingw && sc && sc->isCOMDAT()) + if (shouldStripSectionSuffix(sc, name)) name = name.split('$').first; PartialSection *pSec = createPartialSection(name, c->getOutputCharacteristics()); @@ -1076,6 +1095,13 @@ Optional Writer::createSymbol(Defined *def) { } } + // Symbols that are runtime pseudo relocations don't point to the actual + // symbol data itself (as they are imported), but points to the IAT entry + // instead. Avoid emitting them to the symbol table, as they can confuse + // debuggers. + if (def->isRuntimePseudoReloc) + return None; + StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp index f6d0f190d84d..35b9b8928c9f 100644 --- a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -3177,11 +3177,23 @@ static bool isDuplicateArmExidxSec(InputSection *prev, InputSection *cur) { // The .ARM.exidx table must be sorted in ascending order of the address of the // functions the table describes. Optionally duplicate adjacent table entries -// can be removed. At the end of the function the ExecutableSections must be +// can be removed. At the end of the function the executableSections must be // sorted in ascending order of address, Sentinel is set to the InputSection // with the highest address and any InputSections that have mergeable // .ARM.exidx table entries are removed from it. void ARMExidxSyntheticSection::finalizeContents() { + if (script->hasSectionsCommand) { + // The executableSections and exidxSections that we use to derive the + // final contents of this SyntheticSection are populated before the + // linker script assigns InputSections to OutputSections. The linker script + // SECTIONS command may have a /DISCARD/ entry that removes executable + // InputSections and their dependent .ARM.exidx section that we recorded + // earlier. + auto isDiscarded = [](const InputSection *isec) { return !isec->isLive(); }; + llvm::erase_if(executableSections, isDiscarded); + llvm::erase_if(exidxSections, isDiscarded); + } + // Sort the executable sections that may or may not have associated // .ARM.exidx sections by order of ascending address. This requires the // relative positions of InputSections to be known. diff --git a/contrib/llvm/tools/lld/docs/ReleaseNotes.rst b/contrib/llvm/tools/lld/docs/ReleaseNotes.rst index 76207fec11ac..2d358bf8f246 100644 --- a/contrib/llvm/tools/lld/docs/ReleaseNotes.rst +++ b/contrib/llvm/tools/lld/docs/ReleaseNotes.rst @@ -28,6 +28,15 @@ ELF Improvements ``$ ld.lld --call-shared`` now prints ``unknown argument '--call-shared', did you mean '--call_shared'``. +* lld now supports replacing ``JAL`` with ``JALX`` instructions in case + of MIPS - microMIPS cross-mode jumps. + +* lld now creates LA25 thunks for MIPS R6 code. + +* Put MIPS-specific .reginfo, .MIPS.options, and .MIPS.abiflags sections + into corresponding PT_MIPS_REGINFO, PT_MIPS_OPTIONS, and PT_MIPS_ABIFLAGS + segments. + * ... COFF Improvements @@ -53,6 +62,14 @@ COFF Improvements * Several speed and memory usage improvements. +* Range extension thunks are now created for ARM64, if needed + +* lld-link now supports resource object files created by GNU windres and + MS cvtres, not only llvm-cvtres + +* The generated thunks for delayimports now share the majority of code + among thunks, significantly reducing the overhead of using delayimport + * ... MinGW Improvements @@ -62,6 +79,17 @@ MinGW Improvements terminators for the sections such as .eh_frame properly, fixing DWARF exception handling with libgcc and gcc's crtend.o. +* lld now also handles DWARF unwind info generated by GCC, when linking + with libgcc + +* Many more GNU ld options are now supported, which e.g. allows the lld + MinGW frontend to be called by GCC + +* PDB output can be requested without manually specifying the PDB file + name, with the new option ``-pdb=`` with an empty value to the option. + (The old existing syntax ``-pdb `` was more cumbersome to use + with an empty parameter value.) + MachO Improvements ------------------ diff --git a/contrib/llvm/tools/lldb/source/Core/IOHandler.cpp b/contrib/llvm/tools/lldb/source/Core/IOHandler.cpp index b30308490cca..c3c722019faa 100644 --- a/contrib/llvm/tools/lldb/source/Core/IOHandler.cpp +++ b/contrib/llvm/tools/lldb/source/Core/IOHandler.cpp @@ -233,7 +233,7 @@ int IOHandlerDelegate::IOHandlerComplete( matches, descriptions); case Completion::Expression: { CompletionResult result; - CompletionRequest request(current_line, current_line - cursor, + CompletionRequest request(current_line, cursor - current_line, skip_first_n_matches, max_matches, result); CommandCompletions::InvokeCommonCompletionCallbacks( io_handler.GetDebugger().GetCommandInterpreter(), diff --git a/lib/clang/include/clang/Config/config.h b/lib/clang/include/clang/Config/config.h index f556b117d262..aea09a42bfda 100644 --- a/lib/clang/include/clang/Config/config.h +++ b/lib/clang/include/clang/Config/config.h @@ -62,7 +62,7 @@ #define CLANG_HAVE_RLIMITS 1 /* The LLVM product name and version */ -#define BACKEND_PACKAGE_STRING "LLVM 9.0.0svn" +#define BACKEND_PACKAGE_STRING "LLVM 9.0.0" /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc index 60e65072bb94..765b88e2d1a7 100644 --- a/lib/clang/include/lld/Common/Version.inc +++ b/lib/clang/include/lld/Common/Version.inc @@ -6,5 +6,5 @@ #define LLD_VERSION_MINOR 0 // - -#define LLD_REVISION "366426-1300004" +#define LLD_REVISION "369369-1300004" #define LLD_REPOSITORY "FreeBSD" diff --git a/lib/clang/include/llvm/Config/config.h b/lib/clang/include/llvm/Config/config.h index f1d71fda57fa..05c9d0f5ec6a 100644 --- a/lib/clang/include/llvm/Config/config.h +++ b/lib/clang/include/llvm/Config/config.h @@ -322,10 +322,10 @@ #define PACKAGE_NAME "LLVM" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "LLVM 9.0.0svn" +#define PACKAGE_STRING "LLVM 9.0.0" /* Define to the version of this package. */ -#define PACKAGE_VERSION "9.0.0svn" +#define PACKAGE_VERSION "9.0.0" /* Define to the vendor of this package. */ /* #undef PACKAGE_VENDOR */ diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h index 5234fb4f3da9..4ec04d91c1d4 100644 --- a/lib/clang/include/llvm/Config/llvm-config.h +++ b/lib/clang/include/llvm/Config/llvm-config.h @@ -76,7 +76,7 @@ #define LLVM_VERSION_PATCH 0 /* LLVM version string */ -#define LLVM_VERSION_STRING "9.0.0svn" +#define LLVM_VERSION_STRING "9.0.0" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index b4c6a1782d89..ba131dafd9b5 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "366426" -#define LLVM_REPOSITORY "https://llvm.org/svn/llvm-project/llvm/trunk" +#define LLVM_REVISION "369369" +#define LLVM_REPOSITORY "https://llvm.org/svn/llvm-project/llvm/branches/release_90"