Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb, and openmp
release_90 branch r369369, and update version numbers.
This commit is contained in:
parent
fd6107bfb7
commit
9c346a793e
@ -39,8 +39,8 @@ void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
|
||||
|
||||
// ---------------------- TSD ---------------- {{{1
|
||||
|
||||
#if SANITIZER_NETBSD || SANITIZER_FREEBSD
|
||||
// Thread Static Data cannot be used in early init on NetBSD and FreeBSD.
|
||||
#if SANITIZER_NETBSD && !ASAN_DYNAMIC
|
||||
// Thread Static Data cannot be used in early static ASan init on NetBSD.
|
||||
// Reuse the Asan TSD API for compatibility with existing code
|
||||
// with an alternative implementation.
|
||||
|
||||
|
@ -543,7 +543,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
|
||||
setFeature(FEATURE_BMI);
|
||||
if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
|
||||
setFeature(FEATURE_AVX2);
|
||||
if (HasLeaf7 && ((EBX >> 9) & 1))
|
||||
if (HasLeaf7 && ((EBX >> 8) & 1))
|
||||
setFeature(FEATURE_BMI2);
|
||||
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
|
||||
setFeature(FEATURE_AVX512F);
|
||||
|
@ -33,7 +33,7 @@ static int StrCmp(const char *s1, const char *s2) {
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *GetFuncAddr(const char *name, uptr wrapper_addr) {
|
||||
static void *GetFuncAddr(const char *name) {
|
||||
#if SANITIZER_NETBSD
|
||||
// FIXME: Find a better way to handle renames
|
||||
if (StrCmp(name, "sigaction"))
|
||||
@ -47,18 +47,13 @@ static void *GetFuncAddr(const char *name, uptr wrapper_addr) {
|
||||
// want the address of the real definition, though, so look it up using
|
||||
// RTLD_DEFAULT.
|
||||
addr = dlsym(RTLD_DEFAULT, name);
|
||||
|
||||
// In case `name' is not loaded, dlsym ends up finding the actual wrapper.
|
||||
// We don't want to intercept the wrapper and have it point to itself.
|
||||
if ((uptr)addr == wrapper_addr)
|
||||
addr = nullptr;
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func,
|
||||
uptr wrapper) {
|
||||
void *addr = GetFuncAddr(name, wrapper);
|
||||
void *addr = GetFuncAddr(name);
|
||||
*ptr_to_real = (uptr)addr;
|
||||
return addr && (func == wrapper);
|
||||
}
|
||||
|
@ -174,8 +174,8 @@ void InstallAtExitHandler() {
|
||||
|
||||
// ---------------------- TSD ---------------- {{{1
|
||||
|
||||
#if SANITIZER_NETBSD || SANITIZER_FREEBSD
|
||||
// Thread Static Data cannot be used in early init on NetBSD and FreeBSD.
|
||||
#if SANITIZER_NETBSD
|
||||
// Thread Static Data cannot be used in early init on NetBSD.
|
||||
// Reuse the MSan TSD API for compatibility with existing code
|
||||
// with an alternative implementation.
|
||||
|
||||
|
@ -24,7 +24,7 @@ struct ioctl_desc {
|
||||
const char *name;
|
||||
};
|
||||
|
||||
const unsigned ioctl_table_max = 1200;
|
||||
const unsigned ioctl_table_max = 1236;
|
||||
static ioctl_desc ioctl_table[ioctl_table_max];
|
||||
static unsigned ioctl_table_size = 0;
|
||||
|
||||
@ -645,7 +645,7 @@ static void ioctl_table_fill() {
|
||||
_(SPKRTUNE, NONE, 0);
|
||||
_(SPKRGETVOL, WRITE, sizeof(unsigned int));
|
||||
_(SPKRSETVOL, READ, sizeof(unsigned int));
|
||||
#if 0 /* WIP */
|
||||
#if defined(__x86_64__)
|
||||
/* Entries from file: dev/nvmm/nvmm_ioctl.h */
|
||||
_(NVMM_IOC_CAPABILITY, WRITE, struct_nvmm_ioc_capability_sz);
|
||||
_(NVMM_IOC_MACHINE_CREATE, READWRITE, struct_nvmm_ioc_machine_create_sz);
|
||||
@ -661,7 +661,11 @@ static void ioctl_table_fill() {
|
||||
_(NVMM_IOC_GPA_UNMAP, READ, struct_nvmm_ioc_gpa_unmap_sz);
|
||||
_(NVMM_IOC_HVA_MAP, READ, struct_nvmm_ioc_hva_map_sz);
|
||||
_(NVMM_IOC_HVA_UNMAP, READ, struct_nvmm_ioc_hva_unmap_sz);
|
||||
_(NVMM_IOC_CTL, READ, struct_nvmm_ioc_ctl_sz);
|
||||
#endif
|
||||
/* Entries from file: dev/spi/spi_io.h */
|
||||
_(SPI_IOCTL_CONFIGURE, READ, struct_spi_ioctl_configure_sz);
|
||||
_(SPI_IOCTL_TRANSFER, READ, struct_spi_ioctl_transfer_sz);
|
||||
/* Entries from file: fs/autofs/autofs_ioctl.h */
|
||||
_(AUTOFSREQUEST, WRITE, struct_autofs_daemon_request_sz);
|
||||
_(AUTOFSDONE, READ, struct_autofs_daemon_done_sz);
|
||||
@ -895,6 +899,9 @@ static void ioctl_table_fill() {
|
||||
_(AUDIO_GETBUFINFO, WRITE, struct_audio_info_sz);
|
||||
_(AUDIO_SETCHAN, READ, sizeof(int));
|
||||
_(AUDIO_GETCHAN, WRITE, sizeof(int));
|
||||
_(AUDIO_QUERYFORMAT, READWRITE, struct_audio_format_query_sz);
|
||||
_(AUDIO_GETFORMAT, WRITE, struct_audio_info_sz);
|
||||
_(AUDIO_SETFORMAT, READ, struct_audio_info_sz);
|
||||
_(AUDIO_MIXER_READ, READWRITE, struct_mixer_ctrl_sz);
|
||||
_(AUDIO_MIXER_WRITE, READWRITE, struct_mixer_ctrl_sz);
|
||||
_(AUDIO_MIXER_DEVINFO, READWRITE, struct_mixer_devinfo_sz);
|
||||
@ -985,6 +992,7 @@ static void ioctl_table_fill() {
|
||||
_(DIOCMWEDGES, WRITE, sizeof(int));
|
||||
_(DIOCGSECTORSIZE, WRITE, sizeof(unsigned int));
|
||||
_(DIOCGMEDIASIZE, WRITE, sizeof(uptr));
|
||||
_(DIOCRMWEDGES, WRITE, sizeof(int));
|
||||
/* Entries from file: sys/drvctlio.h */
|
||||
_(DRVDETACHDEV, READ, struct_devdetachargs_sz);
|
||||
_(DRVRESCANBUS, READ, struct_devrescanargs_sz);
|
||||
@ -1206,6 +1214,8 @@ static void ioctl_table_fill() {
|
||||
_(SIOCGETHERCAP, READWRITE, struct_eccapreq_sz);
|
||||
_(SIOCGIFINDEX, READWRITE, struct_ifreq_sz);
|
||||
_(SIOCSETHERCAP, READ, struct_eccapreq_sz);
|
||||
_(SIOCSIFDESCR, READ, struct_ifreq_sz);
|
||||
_(SIOCGIFDESCR, READWRITE, struct_ifreq_sz);
|
||||
_(SIOCGUMBINFO, READWRITE, struct_ifreq_sz);
|
||||
_(SIOCSUMBPARAM, READ, struct_ifreq_sz);
|
||||
_(SIOCGUMBPARAM, READWRITE, struct_ifreq_sz);
|
||||
@ -1335,6 +1345,21 @@ static void ioctl_table_fill() {
|
||||
_(WDOGIOC_TICKLE, NONE, 0);
|
||||
_(WDOGIOC_GTICKLER, WRITE, sizeof(int));
|
||||
_(WDOGIOC_GWDOGS, READWRITE, struct_wdog_conf_sz);
|
||||
/* Entries from file: sys/kcov.h */
|
||||
_(KCOV_IOC_SETBUFSIZE, READ, sizeof(u64));
|
||||
_(KCOV_IOC_ENABLE, READ, sizeof(int));
|
||||
_(KCOV_IOC_DISABLE, NONE, 0);
|
||||
/* Entries from file: sys/ipmi.h */
|
||||
_(IPMICTL_RECEIVE_MSG_TRUNC, READWRITE, struct_ipmi_recv_sz);
|
||||
_(IPMICTL_RECEIVE_MSG, READWRITE, struct_ipmi_recv_sz);
|
||||
_(IPMICTL_SEND_COMMAND, READ, struct_ipmi_req_sz);
|
||||
_(IPMICTL_REGISTER_FOR_CMD, READ, struct_ipmi_cmdspec_sz);
|
||||
_(IPMICTL_UNREGISTER_FOR_CMD, READ, struct_ipmi_cmdspec_sz);
|
||||
_(IPMICTL_SET_GETS_EVENTS_CMD, READ, sizeof(int));
|
||||
_(IPMICTL_SET_MY_ADDRESS_CMD, READ, sizeof(unsigned int));
|
||||
_(IPMICTL_GET_MY_ADDRESS_CMD, WRITE, sizeof(unsigned int));
|
||||
_(IPMICTL_SET_MY_LUN_CMD, READ, sizeof(unsigned int));
|
||||
_(IPMICTL_GET_MY_LUN_CMD, WRITE, sizeof(unsigned int));
|
||||
/* Entries from file: soundcard.h */
|
||||
_(SNDCTL_DSP_RESET, NONE, 0);
|
||||
_(SNDCTL_DSP_SYNC, NONE, 0);
|
||||
|
@ -779,7 +779,11 @@ int internal_sysctl(const int *name, unsigned int namelen, void *oldp,
|
||||
#if SANITIZER_FREEBSD
|
||||
int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
|
||||
const void *newp, uptr newlen) {
|
||||
return sysctlbyname(sname, oldp, (size_t *)oldlenp, newp, (size_t)newlen);
|
||||
static decltype(sysctlbyname) *real = nullptr;
|
||||
if (!real)
|
||||
real = (decltype(sysctlbyname) *)dlsym(RTLD_NEXT, "sysctlbyname");
|
||||
CHECK(real);
|
||||
return real(sname, oldp, (size_t *)oldlenp, newp, (size_t)newlen);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -62,6 +62,8 @@
|
||||
#include <sys/event.h>
|
||||
#include <sys/filio.h>
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/ipmi.h>
|
||||
#include <sys/kcov.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/mount.h>
|
||||
@ -123,9 +125,6 @@
|
||||
#include <dev/isa/isvio.h>
|
||||
#include <dev/isa/wtreg.h>
|
||||
#include <dev/iscsi/iscsi_ioctl.h>
|
||||
#if 0
|
||||
#include <dev/nvmm/nvmm_ioctl.h>
|
||||
#endif
|
||||
#include <dev/ofw/openfirmio.h>
|
||||
#include <dev/pci/amrio.h>
|
||||
#include <dev/pci/mlyreg.h>
|
||||
@ -168,6 +167,7 @@
|
||||
#include <dev/raidframe/raidframeio.h>
|
||||
#include <dev/sbus/mbppio.h>
|
||||
#include <dev/scsipi/ses.h>
|
||||
#include <dev/spi/spi_io.h>
|
||||
#include <dev/spkrio.h>
|
||||
#include <dev/sun/disklabel.h>
|
||||
#include <dev/sun/fbio.h>
|
||||
@ -221,6 +221,10 @@
|
||||
#include <regex.h>
|
||||
#include <fstab.h>
|
||||
#include <stringlist.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#include <nvmm.h>
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
// Include these after system headers to avoid name clashes and ambiguities.
|
||||
@ -686,6 +690,26 @@ unsigned struct_usb_config_desc_sz = sizeof(usb_config_desc);
|
||||
unsigned struct_usb_ctl_report_desc_sz = sizeof(usb_ctl_report_desc);
|
||||
unsigned struct_usb_ctl_report_sz = sizeof(usb_ctl_report);
|
||||
unsigned struct_usb_ctl_request_sz = sizeof(usb_ctl_request);
|
||||
#if defined(__x86_64__)
|
||||
unsigned struct_nvmm_ioc_capability_sz = sizeof(nvmm_ioc_capability);
|
||||
unsigned struct_nvmm_ioc_machine_create_sz = sizeof(nvmm_ioc_machine_create);
|
||||
unsigned struct_nvmm_ioc_machine_destroy_sz = sizeof(nvmm_ioc_machine_destroy);
|
||||
unsigned struct_nvmm_ioc_machine_configure_sz =
|
||||
sizeof(nvmm_ioc_machine_configure);
|
||||
unsigned struct_nvmm_ioc_vcpu_create_sz = sizeof(nvmm_ioc_vcpu_create);
|
||||
unsigned struct_nvmm_ioc_vcpu_destroy_sz = sizeof(nvmm_ioc_vcpu_destroy);
|
||||
unsigned struct_nvmm_ioc_vcpu_setstate_sz = sizeof(nvmm_ioc_vcpu_destroy);
|
||||
unsigned struct_nvmm_ioc_vcpu_getstate_sz = sizeof(nvmm_ioc_vcpu_getstate);
|
||||
unsigned struct_nvmm_ioc_vcpu_inject_sz = sizeof(nvmm_ioc_vcpu_inject);
|
||||
unsigned struct_nvmm_ioc_vcpu_run_sz = sizeof(nvmm_ioc_vcpu_run);
|
||||
unsigned struct_nvmm_ioc_gpa_map_sz = sizeof(nvmm_ioc_gpa_map);
|
||||
unsigned struct_nvmm_ioc_gpa_unmap_sz = sizeof(nvmm_ioc_gpa_unmap);
|
||||
unsigned struct_nvmm_ioc_hva_map_sz = sizeof(nvmm_ioc_hva_map);
|
||||
unsigned struct_nvmm_ioc_hva_unmap_sz = sizeof(nvmm_ioc_hva_unmap);
|
||||
unsigned struct_nvmm_ioc_ctl_sz = sizeof(nvmm_ioc_ctl);
|
||||
#endif
|
||||
unsigned struct_spi_ioctl_configure_sz = sizeof(spi_ioctl_configure);
|
||||
unsigned struct_spi_ioctl_transfer_sz = sizeof(spi_ioctl_transfer);
|
||||
unsigned struct_autofs_daemon_request_sz = sizeof(autofs_daemon_request);
|
||||
unsigned struct_autofs_daemon_done_sz = sizeof(autofs_daemon_done);
|
||||
unsigned struct_sctp_connectx_addrs_sz = sizeof(sctp_connectx_addrs);
|
||||
@ -728,6 +752,9 @@ unsigned struct_vnd_user_sz = sizeof(vnd_user);
|
||||
unsigned struct_vt_stat_sz = sizeof(vt_stat);
|
||||
unsigned struct_wdog_conf_sz = sizeof(wdog_conf);
|
||||
unsigned struct_wdog_mode_sz = sizeof(wdog_mode);
|
||||
unsigned struct_ipmi_recv_sz = sizeof(ipmi_recv);
|
||||
unsigned struct_ipmi_req_sz = sizeof(ipmi_req);
|
||||
unsigned struct_ipmi_cmdspec_sz = sizeof(ipmi_cmdspec);
|
||||
unsigned struct_wfq_conf_sz = sizeof(wfq_conf);
|
||||
unsigned struct_wfq_getqid_sz = sizeof(wfq_getqid);
|
||||
unsigned struct_wfq_getstats_sz = sizeof(wfq_getstats);
|
||||
@ -813,6 +840,7 @@ unsigned struct_iscsi_wait_event_parameters_sz =
|
||||
unsigned struct_isp_stats_sz = sizeof(isp_stats_t);
|
||||
unsigned struct_lsenable_sz = sizeof(struct lsenable);
|
||||
unsigned struct_lsdisable_sz = sizeof(struct lsdisable);
|
||||
unsigned struct_audio_format_query_sz = sizeof(audio_format_query);
|
||||
unsigned struct_mixer_ctrl_sz = sizeof(struct mixer_ctrl);
|
||||
unsigned struct_mixer_devinfo_sz = sizeof(struct mixer_devinfo);
|
||||
unsigned struct_mpu_command_rec_sz = sizeof(mpu_command_rec);
|
||||
@ -1423,7 +1451,7 @@ unsigned IOCTL_SPKRTONE = SPKRTONE;
|
||||
unsigned IOCTL_SPKRTUNE = SPKRTUNE;
|
||||
unsigned IOCTL_SPKRGETVOL = SPKRGETVOL;
|
||||
unsigned IOCTL_SPKRSETVOL = SPKRSETVOL;
|
||||
#if 0 /* interfaces are WIP */
|
||||
#if defined(__x86_64__)
|
||||
unsigned IOCTL_NVMM_IOC_CAPABILITY = NVMM_IOC_CAPABILITY;
|
||||
unsigned IOCTL_NVMM_IOC_MACHINE_CREATE = NVMM_IOC_MACHINE_CREATE;
|
||||
unsigned IOCTL_NVMM_IOC_MACHINE_DESTROY = NVMM_IOC_MACHINE_DESTROY;
|
||||
@ -1438,7 +1466,10 @@ unsigned IOCTL_NVMM_IOC_GPA_MAP = NVMM_IOC_GPA_MAP;
|
||||
unsigned IOCTL_NVMM_IOC_GPA_UNMAP = NVMM_IOC_GPA_UNMAP;
|
||||
unsigned IOCTL_NVMM_IOC_HVA_MAP = NVMM_IOC_HVA_MAP;
|
||||
unsigned IOCTL_NVMM_IOC_HVA_UNMAP = NVMM_IOC_HVA_UNMAP;
|
||||
unsigned IOCTL_NVMM_IOC_CTL = NVMM_IOC_CTL;
|
||||
#endif
|
||||
unsigned IOCTL_SPI_IOCTL_CONFIGURE = SPI_IOCTL_CONFIGURE;
|
||||
unsigned IOCTL_SPI_IOCTL_TRANSFER = SPI_IOCTL_TRANSFER;
|
||||
unsigned IOCTL_AUTOFSREQUEST = AUTOFSREQUEST;
|
||||
unsigned IOCTL_AUTOFSDONE = AUTOFSDONE;
|
||||
unsigned IOCTL_BIOCGBLEN = BIOCGBLEN;
|
||||
@ -1656,6 +1687,9 @@ unsigned IOCTL_AUDIO_GETPROPS = AUDIO_GETPROPS;
|
||||
unsigned IOCTL_AUDIO_GETBUFINFO = AUDIO_GETBUFINFO;
|
||||
unsigned IOCTL_AUDIO_SETCHAN = AUDIO_SETCHAN;
|
||||
unsigned IOCTL_AUDIO_GETCHAN = AUDIO_GETCHAN;
|
||||
unsigned IOCTL_AUDIO_QUERYFORMAT = AUDIO_QUERYFORMAT;
|
||||
unsigned IOCTL_AUDIO_GETFORMAT = AUDIO_GETFORMAT;
|
||||
unsigned IOCTL_AUDIO_SETFORMAT = AUDIO_SETFORMAT;
|
||||
unsigned IOCTL_AUDIO_MIXER_READ = AUDIO_MIXER_READ;
|
||||
unsigned IOCTL_AUDIO_MIXER_WRITE = AUDIO_MIXER_WRITE;
|
||||
unsigned IOCTL_AUDIO_MIXER_DEVINFO = AUDIO_MIXER_DEVINFO;
|
||||
@ -1741,6 +1775,7 @@ unsigned IOCTL_DIOCTUR = DIOCTUR;
|
||||
unsigned IOCTL_DIOCMWEDGES = DIOCMWEDGES;
|
||||
unsigned IOCTL_DIOCGSECTORSIZE = DIOCGSECTORSIZE;
|
||||
unsigned IOCTL_DIOCGMEDIASIZE = DIOCGMEDIASIZE;
|
||||
unsigned IOCTL_DIOCRMWEDGES = DIOCRMWEDGES;
|
||||
unsigned IOCTL_DRVDETACHDEV = DRVDETACHDEV;
|
||||
unsigned IOCTL_DRVRESCANBUS = DRVRESCANBUS;
|
||||
unsigned IOCTL_DRVCTLCOMMAND = DRVCTLCOMMAND;
|
||||
@ -1945,6 +1980,8 @@ unsigned IOCTL_SIOCSLINKSTR = SIOCSLINKSTR;
|
||||
unsigned IOCTL_SIOCGETHERCAP = SIOCGETHERCAP;
|
||||
unsigned IOCTL_SIOCGIFINDEX = SIOCGIFINDEX;
|
||||
unsigned IOCTL_SIOCSETHERCAP = SIOCSETHERCAP;
|
||||
unsigned IOCTL_SIOCSIFDESCR = SIOCSIFDESCR;
|
||||
unsigned IOCTL_SIOCGIFDESCR = SIOCGIFDESCR;
|
||||
unsigned IOCTL_SIOCGUMBINFO = SIOCGUMBINFO;
|
||||
unsigned IOCTL_SIOCSUMBPARAM = SIOCSUMBPARAM;
|
||||
unsigned IOCTL_SIOCGUMBPARAM = SIOCGUMBPARAM;
|
||||
@ -2069,6 +2106,19 @@ unsigned IOCTL_WDOGIOC_WHICH = WDOGIOC_WHICH;
|
||||
unsigned IOCTL_WDOGIOC_TICKLE = WDOGIOC_TICKLE;
|
||||
unsigned IOCTL_WDOGIOC_GTICKLER = WDOGIOC_GTICKLER;
|
||||
unsigned IOCTL_WDOGIOC_GWDOGS = WDOGIOC_GWDOGS;
|
||||
unsigned IOCTL_KCOV_IOC_SETBUFSIZE = KCOV_IOC_SETBUFSIZE;
|
||||
unsigned IOCTL_KCOV_IOC_ENABLE = KCOV_IOC_ENABLE;
|
||||
unsigned IOCTL_KCOV_IOC_DISABLE = KCOV_IOC_DISABLE;
|
||||
unsigned IOCTL_IPMICTL_RECEIVE_MSG_TRUNC = IPMICTL_RECEIVE_MSG_TRUNC;
|
||||
unsigned IOCTL_IPMICTL_RECEIVE_MSG = IPMICTL_RECEIVE_MSG;
|
||||
unsigned IOCTL_IPMICTL_SEND_COMMAND = IPMICTL_SEND_COMMAND;
|
||||
unsigned IOCTL_IPMICTL_REGISTER_FOR_CMD = IPMICTL_REGISTER_FOR_CMD;
|
||||
unsigned IOCTL_IPMICTL_UNREGISTER_FOR_CMD = IPMICTL_UNREGISTER_FOR_CMD;
|
||||
unsigned IOCTL_IPMICTL_SET_GETS_EVENTS_CMD = IPMICTL_SET_GETS_EVENTS_CMD;
|
||||
unsigned IOCTL_IPMICTL_SET_MY_ADDRESS_CMD = IPMICTL_SET_MY_ADDRESS_CMD;
|
||||
unsigned IOCTL_IPMICTL_GET_MY_ADDRESS_CMD = IPMICTL_GET_MY_ADDRESS_CMD;
|
||||
unsigned IOCTL_IPMICTL_SET_MY_LUN_CMD = IPMICTL_SET_MY_LUN_CMD;
|
||||
unsigned IOCTL_IPMICTL_GET_MY_LUN_CMD = IPMICTL_GET_MY_LUN_CMD;
|
||||
unsigned IOCTL_SNDCTL_DSP_RESET = SNDCTL_DSP_RESET;
|
||||
unsigned IOCTL_SNDCTL_DSP_SYNC = SNDCTL_DSP_SYNC;
|
||||
unsigned IOCTL_SNDCTL_DSP_SPEED = SNDCTL_DSP_SPEED;
|
||||
|
@ -849,6 +849,25 @@ extern unsigned struct_usb_config_desc_sz;
|
||||
extern unsigned struct_usb_ctl_report_desc_sz;
|
||||
extern unsigned struct_usb_ctl_report_sz;
|
||||
extern unsigned struct_usb_ctl_request_sz;
|
||||
#if defined(__x86_64__)
|
||||
extern unsigned struct_nvmm_ioc_capability_sz;
|
||||
extern unsigned struct_nvmm_ioc_machine_create_sz;
|
||||
extern unsigned struct_nvmm_ioc_machine_destroy_sz;
|
||||
extern unsigned struct_nvmm_ioc_machine_configure_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_create_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_destroy_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_setstate_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_getstate_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_inject_sz;
|
||||
extern unsigned struct_nvmm_ioc_vcpu_run_sz;
|
||||
extern unsigned struct_nvmm_ioc_gpa_map_sz;
|
||||
extern unsigned struct_nvmm_ioc_gpa_unmap_sz;
|
||||
extern unsigned struct_nvmm_ioc_hva_map_sz;
|
||||
extern unsigned struct_nvmm_ioc_hva_unmap_sz;
|
||||
extern unsigned struct_nvmm_ioc_ctl_sz;
|
||||
#endif
|
||||
extern unsigned struct_spi_ioctl_configure_sz;
|
||||
extern unsigned struct_spi_ioctl_transfer_sz;
|
||||
extern unsigned struct_autofs_daemon_request_sz;
|
||||
extern unsigned struct_autofs_daemon_done_sz;
|
||||
extern unsigned struct_sctp_connectx_addrs_sz;
|
||||
@ -891,6 +910,9 @@ extern unsigned struct_vnd_user_sz;
|
||||
extern unsigned struct_vt_stat_sz;
|
||||
extern unsigned struct_wdog_conf_sz;
|
||||
extern unsigned struct_wdog_mode_sz;
|
||||
extern unsigned struct_ipmi_recv_sz;
|
||||
extern unsigned struct_ipmi_req_sz;
|
||||
extern unsigned struct_ipmi_cmdspec_sz;
|
||||
extern unsigned struct_wfq_conf_sz;
|
||||
extern unsigned struct_wfq_getqid_sz;
|
||||
extern unsigned struct_wfq_getstats_sz;
|
||||
@ -969,6 +991,7 @@ extern unsigned struct_iscsi_wait_event_parameters_sz;
|
||||
extern unsigned struct_isp_stats_sz;
|
||||
extern unsigned struct_lsenable_sz;
|
||||
extern unsigned struct_lsdisable_sz;
|
||||
extern unsigned struct_audio_format_query_sz;
|
||||
extern unsigned struct_mixer_ctrl_sz;
|
||||
extern unsigned struct_mixer_devinfo_sz;
|
||||
extern unsigned struct_mpu_command_rec_sz;
|
||||
@ -1575,7 +1598,7 @@ extern unsigned IOCTL_SPKRTONE;
|
||||
extern unsigned IOCTL_SPKRTUNE;
|
||||
extern unsigned IOCTL_SPKRGETVOL;
|
||||
extern unsigned IOCTL_SPKRSETVOL;
|
||||
#if 0 /* interfaces are WIP */
|
||||
#if defined(__x86_64__)
|
||||
extern unsigned IOCTL_NVMM_IOC_CAPABILITY;
|
||||
extern unsigned IOCTL_NVMM_IOC_MACHINE_CREATE;
|
||||
extern unsigned IOCTL_NVMM_IOC_MACHINE_DESTROY;
|
||||
@ -1590,6 +1613,7 @@ extern unsigned IOCTL_NVMM_IOC_GPA_MAP;
|
||||
extern unsigned IOCTL_NVMM_IOC_GPA_UNMAP;
|
||||
extern unsigned IOCTL_NVMM_IOC_HVA_MAP;
|
||||
extern unsigned IOCTL_NVMM_IOC_HVA_UNMAP;
|
||||
extern unsigned IOCTL_NVMM_IOC_CTL;
|
||||
#endif
|
||||
extern unsigned IOCTL_AUTOFSREQUEST;
|
||||
extern unsigned IOCTL_AUTOFSDONE;
|
||||
@ -1808,6 +1832,9 @@ extern unsigned IOCTL_AUDIO_GETPROPS;
|
||||
extern unsigned IOCTL_AUDIO_GETBUFINFO;
|
||||
extern unsigned IOCTL_AUDIO_SETCHAN;
|
||||
extern unsigned IOCTL_AUDIO_GETCHAN;
|
||||
extern unsigned IOCTL_AUDIO_QUERYFORMAT;
|
||||
extern unsigned IOCTL_AUDIO_GETFORMAT;
|
||||
extern unsigned IOCTL_AUDIO_SETFORMAT;
|
||||
extern unsigned IOCTL_AUDIO_MIXER_READ;
|
||||
extern unsigned IOCTL_AUDIO_MIXER_WRITE;
|
||||
extern unsigned IOCTL_AUDIO_MIXER_DEVINFO;
|
||||
@ -1893,6 +1920,7 @@ extern unsigned IOCTL_DIOCTUR;
|
||||
extern unsigned IOCTL_DIOCMWEDGES;
|
||||
extern unsigned IOCTL_DIOCGSECTORSIZE;
|
||||
extern unsigned IOCTL_DIOCGMEDIASIZE;
|
||||
extern unsigned IOCTL_DIOCRMWEDGES;
|
||||
extern unsigned IOCTL_DRVDETACHDEV;
|
||||
extern unsigned IOCTL_DRVRESCANBUS;
|
||||
extern unsigned IOCTL_DRVCTLCOMMAND;
|
||||
@ -1994,6 +2022,8 @@ extern unsigned IOCTL_SEQUENCER_TMR_TEMPO;
|
||||
extern unsigned IOCTL_SEQUENCER_TMR_SOURCE;
|
||||
extern unsigned IOCTL_SEQUENCER_TMR_METRONOME;
|
||||
extern unsigned IOCTL_SEQUENCER_TMR_SELECT;
|
||||
extern unsigned IOCTL_SPI_IOCTL_CONFIGURE;
|
||||
extern unsigned IOCTL_SPI_IOCTL_TRANSFER;
|
||||
extern unsigned IOCTL_MTIOCTOP;
|
||||
extern unsigned IOCTL_MTIOCGET;
|
||||
extern unsigned IOCTL_MTIOCIEOT;
|
||||
@ -2097,6 +2127,8 @@ extern unsigned IOCTL_SIOCSLINKSTR;
|
||||
extern unsigned IOCTL_SIOCGETHERCAP;
|
||||
extern unsigned IOCTL_SIOCGIFINDEX;
|
||||
extern unsigned IOCTL_SIOCSETHERCAP;
|
||||
extern unsigned IOCTL_SIOCSIFDESCR;
|
||||
extern unsigned IOCTL_SIOCGIFDESCR;
|
||||
extern unsigned IOCTL_SIOCGUMBINFO;
|
||||
extern unsigned IOCTL_SIOCSUMBPARAM;
|
||||
extern unsigned IOCTL_SIOCGUMBPARAM;
|
||||
@ -2221,6 +2253,19 @@ extern unsigned IOCTL_WDOGIOC_WHICH;
|
||||
extern unsigned IOCTL_WDOGIOC_TICKLE;
|
||||
extern unsigned IOCTL_WDOGIOC_GTICKLER;
|
||||
extern unsigned IOCTL_WDOGIOC_GWDOGS;
|
||||
extern unsigned IOCTL_KCOV_IOC_SETBUFSIZE;
|
||||
extern unsigned IOCTL_KCOV_IOC_ENABLE;
|
||||
extern unsigned IOCTL_KCOV_IOC_DISABLE;
|
||||
extern unsigned IOCTL_IPMICTL_RECEIVE_MSG_TRUNC;
|
||||
extern unsigned IOCTL_IPMICTL_RECEIVE_MSG;
|
||||
extern unsigned IOCTL_IPMICTL_SEND_COMMAND;
|
||||
extern unsigned IOCTL_IPMICTL_REGISTER_FOR_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_UNREGISTER_FOR_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_SET_GETS_EVENTS_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_SET_MY_ADDRESS_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_GET_MY_ADDRESS_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_SET_MY_LUN_CMD;
|
||||
extern unsigned IOCTL_IPMICTL_GET_MY_LUN_CMD;
|
||||
extern unsigned IOCTL_SNDCTL_DSP_RESET;
|
||||
extern unsigned IOCTL_SNDCTL_DSP_SYNC;
|
||||
extern unsigned IOCTL_SNDCTL_DSP_SPEED;
|
||||
|
@ -149,6 +149,7 @@ static void BackgroundThread(void *arg) {
|
||||
// We don't use ScopedIgnoreInterceptors, because we want ignores to be
|
||||
// enabled even when the thread function exits (e.g. during pthread thread
|
||||
// shutdown code).
|
||||
cur_thread_init();
|
||||
cur_thread()->ignore_interceptors++;
|
||||
const u64 kMs2Ns = 1000 * 1000;
|
||||
|
||||
|
@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
|
||||
|
||||
/// A pointer to the AAResults object that this AAResult is
|
||||
/// aggregated within. May be null if not aggregated.
|
||||
AAResults *AAR;
|
||||
AAResults *AAR = nullptr;
|
||||
|
||||
/// Helper to dispatch calls back through the derived type.
|
||||
DerivedT &derived() { return static_cast<DerivedT &>(*this); }
|
||||
|
@ -269,7 +269,13 @@ class SelectionDAG {
|
||||
|
||||
using CallSiteInfo = MachineFunction::CallSiteInfo;
|
||||
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
|
||||
DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
|
||||
|
||||
struct CallSiteDbgInfo {
|
||||
CallSiteInfo CSInfo;
|
||||
MDNode *HeapAllocSite = nullptr;
|
||||
};
|
||||
|
||||
DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
|
||||
|
||||
uint16_t NextPersistentId = 0;
|
||||
|
||||
@ -1664,16 +1670,28 @@ class SelectionDAG {
|
||||
}
|
||||
|
||||
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
|
||||
SDCallSiteInfo[CallNode] = std::move(CallInfo);
|
||||
SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
|
||||
}
|
||||
|
||||
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
|
||||
auto I = SDCallSiteInfo.find(CallNode);
|
||||
if (I != SDCallSiteInfo.end())
|
||||
return std::move(I->second);
|
||||
auto I = SDCallSiteDbgInfo.find(CallNode);
|
||||
if (I != SDCallSiteDbgInfo.end())
|
||||
return std::move(I->second).CSInfo;
|
||||
return CallSiteInfo();
|
||||
}
|
||||
|
||||
void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
|
||||
SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
|
||||
}
|
||||
|
||||
/// Return the HeapAllocSite type associated with the SDNode, if it exists.
|
||||
MDNode *getHeapAllocSite(const SDNode *Node) {
|
||||
auto It = SDCallSiteDbgInfo.find(Node);
|
||||
if (It == SDCallSiteDbgInfo.end())
|
||||
return nullptr;
|
||||
return It->second.HeapAllocSite;
|
||||
}
|
||||
|
||||
private:
|
||||
void InsertNode(SDNode *N);
|
||||
bool RemoveNodeFromCSEMaps(SDNode *N);
|
||||
|
@ -3665,6 +3665,7 @@ class TargetLowering : public TargetLoweringBase {
|
||||
C_Register, // Constraint represents specific register(s).
|
||||
C_RegisterClass, // Constraint represents any of register(s) in class.
|
||||
C_Memory, // Memory constraint.
|
||||
C_Immediate, // Requires an immediate.
|
||||
C_Other, // Something else.
|
||||
C_Unknown // Unsupported constraint.
|
||||
};
|
||||
|
@ -16,6 +16,7 @@
|
||||
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ExecutionEngine/JITSymbol.h"
|
||||
#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
|
||||
#include <memory>
|
||||
|
||||
namespace llvm {
|
||||
|
@ -112,6 +112,9 @@ namespace llvm {
|
||||
/// number of section symbols with the same name).
|
||||
StringMap<bool, BumpPtrAllocator &> UsedNames;
|
||||
|
||||
/// Keeps track of labels that are used in inline assembly.
|
||||
SymbolTable InlineAsmUsedLabelNames;
|
||||
|
||||
/// The next ID to dole out to an unnamed assembler temporary symbol with
|
||||
/// a given prefix.
|
||||
StringMap<unsigned> NextID;
|
||||
@ -377,6 +380,16 @@ namespace llvm {
|
||||
/// APIs.
|
||||
const SymbolTable &getSymbols() const { return Symbols; }
|
||||
|
||||
/// isInlineAsmLabel - Return true if the name is a label referenced in
|
||||
/// inline assembly.
|
||||
MCSymbol *getInlineAsmLabel(StringRef Name) const {
|
||||
return InlineAsmUsedLabelNames.lookup(Name);
|
||||
}
|
||||
|
||||
/// registerInlineAsmLabel - Records that the name is a label referenced in
|
||||
/// inline assembly.
|
||||
void registerInlineAsmLabel(MCSymbol *Sym);
|
||||
|
||||
/// @}
|
||||
|
||||
/// \name Section Management
|
||||
|
@ -50,35 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
|
||||
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
|
||||
#endif
|
||||
// FIXME: This would be nicer were it tablegen
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
|
||||
AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
|
||||
AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
|
||||
AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
|
||||
AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
|
||||
AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
|
||||
AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
|
||||
AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
|
||||
AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
|
||||
AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
|
||||
AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
|
||||
AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
|
||||
AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
|
||||
AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
|
||||
AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
|
||||
AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
|
||||
AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
|
||||
AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
|
||||
AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
|
||||
AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
|
||||
AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
|
||||
AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
|
||||
AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
|
||||
#undef AARCH64_ARCH_EXT_NAME
|
||||
|
||||
#ifndef AARCH64_CPU_NAME
|
||||
|
@ -53,7 +53,7 @@ enum ArchExtKind : unsigned {
|
||||
AEK_SVE2AES = 1 << 24,
|
||||
AEK_SVE2SM4 = 1 << 25,
|
||||
AEK_SVE2SHA3 = 1 << 26,
|
||||
AEK_BITPERM = 1 << 27,
|
||||
AEK_SVE2BITPERM = 1 << 27,
|
||||
};
|
||||
|
||||
enum class ArchKind {
|
||||
|
@ -39,19 +39,13 @@ enum ArchExtKind : unsigned {
|
||||
AEK_DSP = 1 << 10,
|
||||
AEK_FP16 = 1 << 11,
|
||||
AEK_RAS = 1 << 12,
|
||||
AEK_SVE = 1 << 13,
|
||||
AEK_DOTPROD = 1 << 14,
|
||||
AEK_SHA2 = 1 << 15,
|
||||
AEK_AES = 1 << 16,
|
||||
AEK_FP16FML = 1 << 17,
|
||||
AEK_SB = 1 << 18,
|
||||
AEK_SVE2 = 1 << 19,
|
||||
AEK_SVE2AES = 1 << 20,
|
||||
AEK_SVE2SM4 = 1 << 21,
|
||||
AEK_SVE2SHA3 = 1 << 22,
|
||||
AEK_BITPERM = 1 << 23,
|
||||
AEK_FP_DP = 1 << 24,
|
||||
AEK_LOB = 1 << 25,
|
||||
AEK_DOTPROD = 1 << 13,
|
||||
AEK_SHA2 = 1 << 14,
|
||||
AEK_AES = 1 << 15,
|
||||
AEK_FP16FML = 1 << 16,
|
||||
AEK_SB = 1 << 17,
|
||||
AEK_FP_DP = 1 << 18,
|
||||
AEK_LOB = 1 << 19,
|
||||
// Unsupported extensions.
|
||||
AEK_OS = 0x8000000,
|
||||
AEK_IWMMXT = 0x10000000,
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseMapInfo.h"
|
||||
#include "llvm/IR/ValueHandle.h"
|
||||
#include <cstdint>
|
||||
|
||||
namespace llvm {
|
||||
@ -28,8 +29,8 @@ class Value;
|
||||
|
||||
struct DivRemMapKey {
|
||||
bool SignedOp;
|
||||
Value *Dividend;
|
||||
Value *Divisor;
|
||||
AssertingVH<Value> Dividend;
|
||||
AssertingVH<Value> Divisor;
|
||||
|
||||
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
|
||||
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
|
||||
@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
|
||||
}
|
||||
|
||||
static unsigned getHashValue(const DivRemMapKey &Val) {
|
||||
return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
|
||||
reinterpret_cast<uintptr_t>(Val.Divisor)) ^
|
||||
return (unsigned)(reinterpret_cast<uintptr_t>(
|
||||
static_cast<Value *>(Val.Dividend)) ^
|
||||
reinterpret_cast<uintptr_t>(
|
||||
static_cast<Value *>(Val.Divisor))) ^
|
||||
(unsigned)Val.SignedOp;
|
||||
}
|
||||
};
|
||||
|
@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
|
||||
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
|
||||
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
|
||||
Sym->print(OS, AP->MAI);
|
||||
MMI->getContext().registerInlineAsmLabel(Sym);
|
||||
} else if (MI->getOperand(OpNo).isMBB()) {
|
||||
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
|
||||
Sym->print(OS, AP->MAI);
|
||||
|
@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
|
||||
TheUse = InsertedShift;
|
||||
}
|
||||
|
||||
// If we removed all uses, nuke the shift.
|
||||
// If we removed all uses, or there are none, nuke the shift.
|
||||
if (ShiftI->use_empty()) {
|
||||
salvageDebugInfo(*ShiftI);
|
||||
ShiftI->eraseFromParent();
|
||||
MadeChange = true;
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
|
@ -691,9 +691,17 @@ void LiveDebugValues::insertTransferDebugPair(
|
||||
"No register supplied when handling a restore of a debug value");
|
||||
MachineFunction *MF = MI.getMF();
|
||||
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
|
||||
|
||||
const DIExpression *NewExpr;
|
||||
if (auto Fragment = DebugInstr->getDebugExpression()->getFragmentInfo())
|
||||
NewExpr = *DIExpression::createFragmentExpression(DIB.createExpression(),
|
||||
Fragment->OffsetInBits, Fragment->SizeInBits);
|
||||
else
|
||||
NewExpr = DIB.createExpression();
|
||||
|
||||
NewDebugInstr =
|
||||
BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
|
||||
NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
|
||||
NewReg, DebugInstr->getDebugVariable(), NewExpr);
|
||||
VarLoc VL(*NewDebugInstr, LS);
|
||||
ProcessVarLoc(VL, NewDebugInstr);
|
||||
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
|
||||
@ -848,9 +856,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
|
||||
<< "\n");
|
||||
}
|
||||
// Check if the register or spill location is the location of a debug value.
|
||||
// FIXME: Don't create a spill transfer if there is a complex expression,
|
||||
// because we currently cannot recover the original expression on restore.
|
||||
for (unsigned ID : OpenRanges.getVarLocs()) {
|
||||
const MachineInstr *DebugInstr = &VarLocIDs[ID].MI;
|
||||
|
||||
if (TKind == TransferKind::TransferSpill &&
|
||||
VarLocIDs[ID].isDescribedByReg() == Reg) {
|
||||
VarLocIDs[ID].isDescribedByReg() == Reg &&
|
||||
!DebugInstr->getDebugExpression()->isComplex()) {
|
||||
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
|
||||
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
|
||||
} else if (TKind == TransferKind::TransferRestore &&
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
@ -66,6 +67,7 @@ namespace {
|
||||
AliasAnalysis *AA;
|
||||
MachineDominatorTree *DT;
|
||||
MachineRegisterInfo *MRI;
|
||||
MachineBlockFrequencyInfo *MBFI;
|
||||
|
||||
public:
|
||||
static char ID; // Pass identification
|
||||
@ -83,6 +85,8 @@ namespace {
|
||||
AU.addPreservedID(MachineLoopInfoID);
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<MachineBlockFrequencyInfo>();
|
||||
AU.addPreserved<MachineBlockFrequencyInfo>();
|
||||
}
|
||||
|
||||
void releaseMemory() override {
|
||||
@ -133,6 +137,11 @@ namespace {
|
||||
bool isPRECandidate(MachineInstr *MI);
|
||||
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
|
||||
bool PerformSimplePRE(MachineDominatorTree *DT);
|
||||
/// Heuristics to see if it's beneficial to move common computations of MBB
|
||||
/// and MBB1 to CandidateBB.
|
||||
bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
|
||||
if (!CMBB->isLegalToHoistInto())
|
||||
continue;
|
||||
|
||||
if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
|
||||
continue;
|
||||
|
||||
// Two instrs are partial redundant if their basic blocks are reachable
|
||||
// from one to another but one doesn't dominate another.
|
||||
if (CMBB != MBB1) {
|
||||
@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
|
||||
MachineBasicBlock *MBB,
|
||||
MachineBasicBlock *MBB1) {
|
||||
if (CandidateBB->getParent()->getFunction().hasMinSize())
|
||||
return true;
|
||||
assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
|
||||
assert(DT->dominates(CandidateBB, MBB1) &&
|
||||
"CandidateBB should dominate MBB1");
|
||||
return MBFI->getBlockFreq(CandidateBB) <=
|
||||
MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
|
||||
}
|
||||
|
||||
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
||||
if (skipFunction(MF.getFunction()))
|
||||
return false;
|
||||
@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
|
||||
MRI = &MF.getRegInfo();
|
||||
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
|
||||
DT = &getAnalysis<MachineDominatorTree>();
|
||||
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
|
||||
LookAheadLimit = TII->getMachineCSELookAheadLimit();
|
||||
bool ChangedPRE, ChangedCSE;
|
||||
ChangedPRE = PerformSimplePRE(DT);
|
||||
|
@ -121,7 +121,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
|
||||
BBCallbacks.back().setMap(this);
|
||||
Entry.Index = BBCallbacks.size() - 1;
|
||||
Entry.Fn = BB->getParent();
|
||||
Entry.Symbols.push_back(Context.createTempSymbol());
|
||||
Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
|
||||
return Entry.Symbols;
|
||||
}
|
||||
|
||||
|
@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
|
||||
// Remember the source order of the inserted instruction.
|
||||
if (HasDbg)
|
||||
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
|
||||
|
||||
if (MDNode *MD = DAG->getHeapAllocSite(N)) {
|
||||
if (NewInsn && NewInsn->isCall())
|
||||
MF.addCodeViewHeapAllocSite(NewInsn, MD);
|
||||
}
|
||||
|
||||
GluedNodes.pop_back();
|
||||
}
|
||||
auto NewInsn =
|
||||
@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
|
||||
if (HasDbg)
|
||||
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
|
||||
NewInsn);
|
||||
if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
|
||||
if (NewInsn && NewInsn->isCall())
|
||||
MF.addCodeViewHeapAllocSite(NewInsn, MD);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert all the dbg_values which have not already been inserted in source
|
||||
|
@ -1084,6 +1084,7 @@ void SelectionDAG::clear() {
|
||||
ExternalSymbols.clear();
|
||||
TargetExternalSymbols.clear();
|
||||
MCSymbols.clear();
|
||||
SDCallSiteDbgInfo.clear();
|
||||
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
|
||||
static_cast<CondCodeSDNode*>(nullptr));
|
||||
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
|
||||
|
@ -8021,6 +8021,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
// Compute the constraint code and ConstraintType to use.
|
||||
TLI.ComputeConstraintToUse(T, SDValue());
|
||||
|
||||
if (T.ConstraintType == TargetLowering::C_Immediate &&
|
||||
OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
|
||||
// We've delayed emitting a diagnostic like the "n" constraint because
|
||||
// inlining could cause an integer showing up.
|
||||
return emitInlineAsmError(
|
||||
CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
|
||||
"integer constant expression");
|
||||
|
||||
ExtraInfo.update(T);
|
||||
}
|
||||
|
||||
@ -8105,7 +8113,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
switch (OpInfo.Type) {
|
||||
case InlineAsm::isOutput:
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
|
||||
(OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
OpInfo.isIndirect)) {
|
||||
unsigned ConstraintID =
|
||||
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
|
||||
@ -8119,13 +8128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
MVT::i32));
|
||||
AsmNodeOperands.push_back(OpInfo.CallOperand);
|
||||
break;
|
||||
} else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
} else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
!OpInfo.isIndirect) ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
|
||||
// Otherwise, this outputs to a register (directly for C_Register /
|
||||
// C_RegisterClass, and a target-defined fashion for C_Other). Find a
|
||||
// register that we can use.
|
||||
// C_RegisterClass, and a target-defined fashion for
|
||||
// C_Immediate/C_Other). Find a register that we can use.
|
||||
if (OpInfo.AssignedRegs.Regs.empty()) {
|
||||
emitInlineAsmError(
|
||||
CS, "couldn't allocate output register for constraint '" +
|
||||
@ -8205,15 +8215,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
}
|
||||
|
||||
// Treat indirect 'X' constraint as memory.
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Other &&
|
||||
if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) &&
|
||||
OpInfo.isIndirect)
|
||||
OpInfo.ConstraintType = TargetLowering::C_Memory;
|
||||
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Other) {
|
||||
std::vector<SDValue> Ops;
|
||||
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
|
||||
Ops, DAG);
|
||||
if (Ops.empty()) {
|
||||
if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
|
||||
if (isa<ConstantSDNode>(InOperandVal)) {
|
||||
emitInlineAsmError(CS, "value out of range for constraint '" +
|
||||
Twine(OpInfo.ConstraintCode) + "'");
|
||||
return;
|
||||
}
|
||||
|
||||
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
|
||||
Twine(OpInfo.ConstraintCode) + "'");
|
||||
return;
|
||||
@ -8250,7 +8269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
}
|
||||
|
||||
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register) &&
|
||||
OpInfo.ConstraintType == TargetLowering::C_Register ||
|
||||
OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
|
||||
"Unknown constraint type!");
|
||||
|
||||
// TODO: Support this.
|
||||
@ -8356,6 +8376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
|
||||
Val = OpInfo.AssignedRegs.getCopyFromRegs(
|
||||
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
|
||||
break;
|
||||
case TargetLowering::C_Immediate:
|
||||
case TargetLowering::C_Other:
|
||||
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
|
||||
OpInfo, DAG);
|
||||
|
@ -3567,15 +3567,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (S == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'r': return C_RegisterClass;
|
||||
case 'r':
|
||||
return C_RegisterClass;
|
||||
case 'm': // memory
|
||||
case 'o': // offsetable
|
||||
case 'V': // not offsetable
|
||||
return C_Memory;
|
||||
case 'i': // Simple Integer or Relocatable Constant
|
||||
case 'n': // Simple Integer
|
||||
case 'E': // Floating Point Constant
|
||||
case 'F': // Floating Point Constant
|
||||
return C_Immediate;
|
||||
case 'i': // Simple Integer or Relocatable Constant
|
||||
case 's': // Relocatable Constant
|
||||
case 'p': // Address.
|
||||
case 'X': // Allow ANY value.
|
||||
@ -3950,6 +3952,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
|
||||
/// Return an integer indicating how general CT is.
|
||||
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
|
||||
switch (CT) {
|
||||
case TargetLowering::C_Immediate:
|
||||
case TargetLowering::C_Other:
|
||||
case TargetLowering::C_Unknown:
|
||||
return 0;
|
||||
@ -4069,11 +4072,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
|
||||
TargetLowering::ConstraintType CType =
|
||||
TLI.getConstraintType(OpInfo.Codes[i]);
|
||||
|
||||
// If this is an 'other' constraint, see if the operand is valid for it.
|
||||
// For example, on X86 we might have an 'rI' constraint. If the operand
|
||||
// is an integer in the range [0..31] we want to use I (saving a load
|
||||
// of a register), otherwise we must use 'r'.
|
||||
if (CType == TargetLowering::C_Other && Op.getNode()) {
|
||||
// If this is an 'other' or 'immediate' constraint, see if the operand is
|
||||
// valid for it. For example, on X86 we might have an 'rI' constraint. If
|
||||
// the operand is an integer in the range [0..31] we want to use I (saving a
|
||||
// load of a register), otherwise we must use 'r'.
|
||||
if ((CType == TargetLowering::C_Other ||
|
||||
CType == TargetLowering::C_Immediate) && Op.getNode()) {
|
||||
assert(OpInfo.Codes[i].size() == 1 &&
|
||||
"Unhandled multi-letter 'other' constraint");
|
||||
std::vector<SDValue> ResultOps;
|
||||
|
@ -61,6 +61,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
|
||||
bool DoAutoReset)
|
||||
: SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
|
||||
Symbols(Allocator), UsedNames(Allocator),
|
||||
InlineAsmUsedLabelNames(Allocator),
|
||||
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
|
||||
AutoReset(DoAutoReset) {
|
||||
SecureLogFile = AsSecureLogFileName;
|
||||
@ -90,6 +91,7 @@ void MCContext::reset() {
|
||||
XCOFFAllocator.DestroyAll();
|
||||
|
||||
MCSubtargetAllocator.DestroyAll();
|
||||
InlineAsmUsedLabelNames.clear();
|
||||
UsedNames.clear();
|
||||
Symbols.clear();
|
||||
Allocator.Reset();
|
||||
@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer,
|
||||
Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
|
||||
}
|
||||
|
||||
void MCContext::registerInlineAsmLabel(MCSymbol *Sym) {
|
||||
InlineAsmUsedLabelNames[Sym->getName()] = Sym;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Section Management
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1142,7 +1142,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
|
||||
}
|
||||
}
|
||||
|
||||
MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
|
||||
MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
|
||||
if (!Sym)
|
||||
Sym = getContext().getOrCreateSymbol(SymbolName);
|
||||
|
||||
// If this is an absolute variable reference, substitute it now to preserve
|
||||
// semantics in the face of reassignment.
|
||||
|
@ -90,9 +90,9 @@ static bool supportsBPF(uint64_t Type) {
|
||||
static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
|
||||
switch (R.getType()) {
|
||||
case ELF::R_BPF_64_32:
|
||||
return S & 0xFFFFFFFF;
|
||||
return (S + A) & 0xFFFFFFFF;
|
||||
case ELF::R_BPF_64_64:
|
||||
return S;
|
||||
return S + A;
|
||||
default:
|
||||
llvm_unreachable("Invalid relocation type");
|
||||
}
|
||||
|
@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
|
||||
Features.push_back("+sve2-sm4");
|
||||
if (Extensions & AEK_SVE2SHA3)
|
||||
Features.push_back("+sve2-sha3");
|
||||
if (Extensions & AEK_BITPERM)
|
||||
Features.push_back("+bitperm");
|
||||
if (Extensions & AEK_SVE2BITPERM)
|
||||
Features.push_back("+sve2-bitperm");
|
||||
if (Extensions & AEK_RCPC)
|
||||
Features.push_back("+rcpc");
|
||||
|
||||
|
@ -1200,7 +1200,7 @@ namespace fs {
|
||||
/// implementation.
|
||||
std::error_code copy_file(const Twine &From, const Twine &To) {
|
||||
uint32_t Flag = COPYFILE_DATA;
|
||||
#if __has_builtin(__builtin_available)
|
||||
#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
|
||||
if (__builtin_available(macos 10.12, *)) {
|
||||
bool IsSymlink;
|
||||
if (std::error_code Error = is_symlink_file(From, IsSymlink))
|
||||
|
@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
|
||||
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
|
||||
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
|
||||
|
||||
def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
|
||||
def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
|
||||
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
|
||||
|
||||
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
|
||||
|
@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
|
||||
|
||||
MaxLoadsPerMemcmpOptSize = 4;
|
||||
MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
|
||||
? MaxLoadsPerMemcmpOptSize : 8;
|
||||
|
||||
setStackPointerRegisterToSaveRestore(AArch64::SP);
|
||||
|
||||
setSchedulingPreference(Sched::Hybrid);
|
||||
@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'z':
|
||||
return C_Other;
|
||||
case 'x':
|
||||
case 'w':
|
||||
return C_RegisterClass;
|
||||
@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
// currently handle addresses it is the same as 'r'.
|
||||
case 'Q':
|
||||
return C_Memory;
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'Y':
|
||||
case 'Z':
|
||||
return C_Immediate;
|
||||
case 'z':
|
||||
case 'S': // A symbolic address
|
||||
return C_Other;
|
||||
}
|
||||
|
@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
|
||||
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
|
||||
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
|
||||
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
|
||||
AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
|
||||
AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
|
||||
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
|
||||
AssemblerPredicate<"FeatureRCPC", "rcpc">;
|
||||
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
|
||||
|
@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in {
|
||||
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
|
||||
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
|
||||
|
||||
// SVE2 predicated shifts
|
||||
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
||||
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
|
||||
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
|
||||
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
|
||||
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
|
||||
|
||||
// SVE2 integer add/subtract long
|
||||
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
|
||||
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
|
||||
@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in {
|
||||
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
|
||||
|
||||
// SVE2 bitwise shift and insert
|
||||
defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
|
||||
defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
|
||||
defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
|
||||
defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
|
||||
|
||||
// SVE2 bitwise shift right and accumulate
|
||||
defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
|
||||
defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
|
||||
defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
|
||||
defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
|
||||
defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
|
||||
defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
|
||||
defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
|
||||
defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
|
||||
|
||||
// SVE2 complex integer add
|
||||
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
|
||||
@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in {
|
||||
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
|
||||
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
|
||||
|
||||
// SVE2 bitwise shift right narrow
|
||||
defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
|
||||
defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
|
||||
defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
|
||||
defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
|
||||
defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
|
||||
defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
|
||||
defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
|
||||
defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
|
||||
defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
|
||||
defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
|
||||
defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
|
||||
defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
|
||||
defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
|
||||
defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
|
||||
defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
|
||||
defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
|
||||
// SVE2 bitwise shift right narrow (bottom)
|
||||
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
|
||||
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
|
||||
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
|
||||
defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
|
||||
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
|
||||
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
|
||||
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
|
||||
defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
|
||||
|
||||
// SVE2 integer add/subtract narrow high part
|
||||
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
|
||||
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
|
||||
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
|
||||
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
|
||||
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
|
||||
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
|
||||
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
|
||||
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
|
||||
// SVE2 bitwise shift right narrow (top)
|
||||
defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
|
||||
defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
|
||||
defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
|
||||
defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
|
||||
defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
|
||||
defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
|
||||
defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
|
||||
defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
|
||||
|
||||
// SVE2 saturating extract narrow
|
||||
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
|
||||
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
|
||||
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
|
||||
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
|
||||
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
|
||||
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
|
||||
// SVE2 integer add/subtract narrow high part (bottom)
|
||||
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
|
||||
defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
|
||||
defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
|
||||
defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
|
||||
|
||||
// SVE2 integer add/subtract narrow high part (top)
|
||||
defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
|
||||
defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
|
||||
defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
|
||||
defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
|
||||
|
||||
// SVE2 saturating extract narrow (bottom)
|
||||
defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
|
||||
defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
|
||||
defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
|
||||
|
||||
// SVE2 saturating extract narrow (top)
|
||||
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
|
||||
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
|
||||
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
|
||||
|
||||
// SVE2 character match
|
||||
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
|
||||
@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in {
|
||||
// SVE2 histogram generation (vector)
|
||||
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
|
||||
|
||||
// SVE2 floating-point base 2 logarithm as integer
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
||||
|
||||
// SVE2 floating-point convert precision
|
||||
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
|
||||
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
|
||||
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
|
||||
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
|
||||
|
||||
// SVE2 floating-point pairwise operations
|
||||
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
|
||||
@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in {
|
||||
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
|
||||
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
|
||||
|
||||
// sve_int_rotate_imm
|
||||
// SVE2 bitwise xor and rotate right by immediate
|
||||
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
|
||||
|
||||
// SVE2 extract vector (immediate offset, constructive)
|
||||
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
|
||||
|
||||
// SVE floating-point convert precision
|
||||
def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
|
||||
// SVE2 non-temporal gather loads
|
||||
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
|
||||
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
|
||||
defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
|
||||
defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
|
||||
defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
|
||||
|
||||
// SVE floating-point convert to integer
|
||||
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
|
||||
|
||||
// Non-temporal contiguous loads (vector + register)
|
||||
defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
|
||||
defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
|
||||
defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
|
||||
defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
|
||||
defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
|
||||
|
||||
defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
|
||||
defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
|
||||
defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
|
||||
defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
|
||||
defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
|
||||
defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
|
||||
defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
|
||||
defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
|
||||
defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
|
||||
defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
|
||||
defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
|
||||
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
|
||||
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
|
||||
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
|
||||
|
||||
// SVE2 vector splice (constructive)
|
||||
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
|
||||
|
||||
// Predicated shifts
|
||||
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
|
||||
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
|
||||
defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
|
||||
defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
|
||||
defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
|
||||
// SVE2 non-temporal scatter stores
|
||||
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
|
||||
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
|
||||
defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
|
||||
|
||||
// Non-temporal contiguous stores (vector + register)
|
||||
defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
|
||||
defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
|
||||
defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
|
||||
defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
|
||||
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
|
||||
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
|
||||
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
|
||||
|
||||
defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
|
||||
defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
|
||||
defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
|
||||
defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
|
||||
|
||||
// SVE table lookup (three sources)
|
||||
// SVE2 table lookup (three sources)
|
||||
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
|
||||
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
|
||||
|
||||
// SVE integer compare scalar count and limit
|
||||
// SVE2 integer compare scalar count and limit
|
||||
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
|
||||
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
|
||||
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
|
||||
@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in {
|
||||
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
|
||||
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
|
||||
|
||||
// SVE pointer conflict compare
|
||||
// SVE2 pointer conflict compare
|
||||
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
|
||||
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
|
||||
}
|
||||
|
@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
|
||||
}
|
||||
|
||||
AArch64TTIImpl::TTI::MemCmpExpansionOptions
|
||||
AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
|
||||
TTI::MemCmpExpansionOptions Options;
|
||||
Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
|
||||
Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
|
||||
Options.NumLoadsPerBlock = Options.MaxNumLoads;
|
||||
// TODO: Though vector loads usually perform well on AArch64, in some targets
|
||||
// they may wake up the FP unit, which raises the power consumption. Perhaps
|
||||
// they could be used with no holds barred (-O3).
|
||||
Options.LoadSizes = {8, 4, 2, 1};
|
||||
return Options;
|
||||
}
|
||||
|
||||
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
|
||||
unsigned Alignment, unsigned AddressSpace,
|
||||
const Instruction *I) {
|
||||
|
@ -130,6 +130,9 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
|
||||
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
const Instruction *I = nullptr);
|
||||
|
||||
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
|
||||
bool IsZeroCmp) const;
|
||||
|
||||
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
|
@ -2840,7 +2840,7 @@ static const struct Extension {
|
||||
{"sve2-aes", {AArch64::FeatureSVE2AES}},
|
||||
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
|
||||
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
|
||||
{"bitperm", {AArch64::FeatureSVE2BitPerm}},
|
||||
{"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
|
||||
// FIXME: Unsupported extensions
|
||||
{"pan", {}},
|
||||
{"lor", {}},
|
||||
|
@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
|
||||
}
|
||||
|
||||
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
|
||||
asm, "\t$Zdn, $Pg",
|
||||
ZPRRegOp zprty, PPRRegOp pprty>
|
||||
: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm),
|
||||
asm, "\t$Zdn, $Pm",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
bits<4> Pg;
|
||||
bits<4> Pm;
|
||||
bits<5> Zdn;
|
||||
let Inst{31-24} = 0b00100101;
|
||||
let Inst{23-22} = sz8_64;
|
||||
@ -416,7 +416,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
let Inst{18-16} = opc{4-2};
|
||||
let Inst{15-11} = 0b10000;
|
||||
let Inst{10-9} = opc{1-0};
|
||||
let Inst{8-5} = Pg;
|
||||
let Inst{8-5} = Pm;
|
||||
let Inst{4-0} = Zdn;
|
||||
|
||||
let Constraints = "$Zdn = $_Zdn";
|
||||
@ -425,9 +425,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
|
||||
}
|
||||
|
||||
multiclass sve_int_count_v<bits<5> opc, string asm> {
|
||||
def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
|
||||
def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
|
||||
def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
|
||||
def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
|
||||
def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
|
||||
def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>;
|
||||
def : InstAlias<asm # "\t$Zdn, $Pm",
|
||||
(!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
|
||||
}
|
||||
|
||||
class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
|
||||
@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl<string asm> {
|
||||
}
|
||||
|
||||
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
|
||||
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -758,6 +765,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
|
||||
let Inst{15-10} = 0b001011;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_perm_tbx<string asm> {
|
||||
@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd<string asm> {
|
||||
|
||||
class sve2_fp_convert_precision<bits<4> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
|
||||
asm, "\t$Zd, $Pg/m, $Zn",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
|
||||
let Inst{12-10} = Pg;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_fp_convert_down_narrow<string asm> {
|
||||
@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
|
||||
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
|
||||
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
|
||||
let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
|
||||
def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
|
||||
def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
|
||||
def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
|
||||
def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
|
||||
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
|
||||
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
|
||||
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
|
||||
}
|
||||
|
||||
class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> Zm;
|
||||
let Inst{31-24} = 0b01000101;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b0;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-11} = 0b10010;
|
||||
let Inst{10} = opc;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
let DestructiveInstType = Destructive;
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
|
||||
def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
|
||||
def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
|
||||
def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
|
||||
def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2,
|
||||
Operand immtype>
|
||||
@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
|
||||
// SVE2 Accumulate Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
|
||||
class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
|
||||
let Inst{10} = opc;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
|
||||
multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
|
||||
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
|
||||
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
|
||||
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty, Operand immtype>
|
||||
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
|
||||
asm, "\t$Zda, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm
|
||||
let ElementSize = ElementSizeNone;
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
|
||||
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
|
||||
let Inst{22} = imm{5};
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
|
||||
// SVE2 Narrowing Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-16} = imm{2-0}; // imm3
|
||||
let Inst{15-14} = 0b00;
|
||||
let Inst{13-10} = opc;
|
||||
let Inst{13-11} = opc;
|
||||
let Inst{10} = 0b0;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
|
||||
def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
|
||||
string asm, ZPRRegOp zprty1,
|
||||
ZPRRegOp zprty2, Operand immtype>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm),
|
||||
asm, "\t$Zd, $Zn, $imm",
|
||||
"", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> imm;
|
||||
let Inst{31-23} = 0b010001010;
|
||||
let Inst{22} = tsz8_64{2};
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-16} = imm{2-0}; // imm3
|
||||
let Inst{15-14} = 0b00;
|
||||
let Inst{13-11} = opc;
|
||||
let Inst{10} = 0b1;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
|
||||
vecshiftR8>;
|
||||
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
|
||||
vecshiftR16> {
|
||||
let Inst{19} = imm{3};
|
||||
}
|
||||
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
|
||||
vecshiftR32> {
|
||||
let Inst{20-19} = imm{4-3};
|
||||
}
|
||||
}
|
||||
|
||||
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = 0b011;
|
||||
let Inst{12-10} = opc; // S, R, T
|
||||
let Inst{12-11} = opc; // S, R
|
||||
let Inst{10} = 0b0; // Top
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
|
||||
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
bits<5> Zm;
|
||||
let Inst{31-24} = 0b01000101;
|
||||
let Inst{23-22} = sz;
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-16} = Zm;
|
||||
let Inst{15-13} = 0b011;
|
||||
let Inst{12-11} = opc; // S, R
|
||||
let Inst{10} = 0b1; // Top
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
|
||||
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-13} = 0b000010;
|
||||
let Inst{12-10} = opc;
|
||||
let Inst{12-11} = opc;
|
||||
let Inst{10} = 0b0;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
}
|
||||
|
||||
multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
|
||||
ZPRRegOp zprty1, ZPRRegOp zprty2>
|
||||
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn),
|
||||
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
|
||||
bits<5> Zd;
|
||||
bits<5> Zn;
|
||||
let Inst{31-23} = 0b010001010;
|
||||
let Inst{22} = tsz8_64{2};
|
||||
let Inst{21} = 0b1;
|
||||
let Inst{20-19} = tsz8_64{1-0};
|
||||
let Inst{18-13} = 0b000010;
|
||||
let Inst{12-11} = opc;
|
||||
let Inst{10} = 0b1;
|
||||
let Inst{9-5} = Zn;
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let Constraints = "$Zd = $_Zd";
|
||||
}
|
||||
|
||||
multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
|
||||
def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
|
||||
def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
|
||||
def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
|
||||
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
|
||||
}
|
||||
|
||||
class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
|
||||
RegisterOperand VecList>
|
||||
: I<(outs VecList:$Zt), iops,
|
||||
class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty>
|
||||
: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
"",
|
||||
[]>, Sched<[]> {
|
||||
@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
|
||||
multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty> {
|
||||
def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, listty>;
|
||||
def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
|
||||
}
|
||||
@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill<string asm> {
|
||||
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
|
||||
}
|
||||
|
||||
class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
RegisterOperand VecList>
|
||||
: I<(outs VecList:$Zt), iops,
|
||||
asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
|
||||
multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
|
||||
RegisterOperand listty, ZPRRegOp zprty> {
|
||||
def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
|
||||
asm, listty>;
|
||||
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
|
||||
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
|
||||
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
|
||||
}
|
||||
|
@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
|
||||
/// constraint it is for this target.
|
||||
ARMTargetLowering::ConstraintType
|
||||
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
unsigned S = Constraint.size();
|
||||
if (S == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'l': return C_RegisterClass;
|
||||
@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'h': return C_RegisterClass;
|
||||
case 'x': return C_RegisterClass;
|
||||
case 't': return C_RegisterClass;
|
||||
case 'j': return C_Other; // Constant for movw.
|
||||
// An address with a single base register. Due to the way we
|
||||
// currently handle addresses it is the same as an 'r' memory constraint.
|
||||
case 'j': return C_Immediate; // Constant for movw.
|
||||
// An address with a single base register. Due to the way we
|
||||
// currently handle addresses it is the same as an 'r' memory constraint.
|
||||
case 'Q': return C_Memory;
|
||||
}
|
||||
} else if (Constraint.size() == 2) {
|
||||
} else if (S == 2) {
|
||||
switch (Constraint[0]) {
|
||||
default: break;
|
||||
case 'T': return C_RegisterClass;
|
||||
|
@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
|
||||
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
|
||||
Sched<[WriteBrTbl]> {
|
||||
let Size = 2;
|
||||
let isNotDuplicable = 1;
|
||||
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
|
||||
}
|
||||
}
|
||||
@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
|
||||
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
|
||||
// and make use of the same compressed jump table format as Thumb-2.
|
||||
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
|
||||
isIndirectBranch = 1 in {
|
||||
isIndirectBranch = 1, isNotDuplicable = 1 in {
|
||||
def tTBB_JT : tPseudoInst<(outs),
|
||||
(ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
|
||||
IIC_Br, []>, Sched<[WriteBr]>;
|
||||
|
@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'a': // Simple upper registers
|
||||
case 'b': // Base pointer registers pairs
|
||||
case 'd': // Upper register
|
||||
@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'O': // Integer constant (Range: 8, 16, 24)
|
||||
case 'P': // Integer constant (Range: 1)
|
||||
case 'R': // Integer constant (Range: -6 to 5)x
|
||||
return C_Other;
|
||||
default:
|
||||
break;
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -116,9 +116,8 @@ class BPFAbstractMemberAccess final : public ModulePass {
|
||||
void replaceWithGEP(std::vector<CallInst *> &CallList,
|
||||
uint32_t NumOfZerosIndex, uint32_t DIIndex);
|
||||
|
||||
Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
|
||||
std::string &AccessKey, uint32_t Kind,
|
||||
MDNode *&TypeMeta);
|
||||
Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey,
|
||||
uint32_t Kind, MDNode *&TypeMeta);
|
||||
bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
|
||||
bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
|
||||
};
|
||||
@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
|
||||
/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
|
||||
/// pointer of the first preserve_*_access_index call, and construct the access
|
||||
/// string, which will be the name of a global variable.
|
||||
Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
std::string &AccessStr,
|
||||
Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
|
||||
std::string &AccessKey,
|
||||
uint32_t Kind,
|
||||
MDNode *&TypeMeta) {
|
||||
@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
|
||||
return nullptr;
|
||||
|
||||
// Construct the type string AccessStr.
|
||||
// Construct the type string AccessKey.
|
||||
for (unsigned I = 0; I < AccessIndices.size(); ++I)
|
||||
AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
|
||||
AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey;
|
||||
|
||||
if (TypeNameIndex == AccessIndices.size() - 1)
|
||||
AccessStr = "0:" + AccessStr;
|
||||
AccessKey = "0:" + AccessKey;
|
||||
|
||||
// Access key is the type name + access string, uniquely identifying
|
||||
// one kernel memory access.
|
||||
AccessKey = LastTypeName + ":" + AccessStr;
|
||||
AccessKey = LastTypeName + ":" + AccessKey;
|
||||
|
||||
return Base;
|
||||
}
|
||||
@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
|
||||
/// transformation to a chain of relocable GEPs.
|
||||
bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
|
||||
uint32_t Kind) {
|
||||
std::string AccessStr, AccessKey;
|
||||
std::string AccessKey;
|
||||
MDNode *TypeMeta = nullptr;
|
||||
Value *Base =
|
||||
computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
|
||||
computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta);
|
||||
if (!Base)
|
||||
return false;
|
||||
|
||||
@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
|
||||
|
||||
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
|
||||
GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
|
||||
GlobalVariable::ExternalLinkage, NULL, AccessStr);
|
||||
GlobalVariable::ExternalLinkage, NULL, AccessKey);
|
||||
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
|
||||
// Set the metadata (debuginfo types) for the global.
|
||||
if (TypeMeta)
|
||||
|
@ -30,6 +30,18 @@ static const char *BTFKindStr[] = {
|
||||
#include "BTF.def"
|
||||
};
|
||||
|
||||
static const DIType * stripQualifiers(const DIType *Ty) {
|
||||
while (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
|
||||
unsigned Tag = DTy->getTag();
|
||||
if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
|
||||
Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type)
|
||||
break;
|
||||
Ty = DTy->getBaseType();
|
||||
}
|
||||
|
||||
return Ty;
|
||||
}
|
||||
|
||||
/// Emit a BTF common type.
|
||||
void BTFTypeBase::emitType(MCStreamer &OS) {
|
||||
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
|
||||
@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
|
||||
}
|
||||
}
|
||||
|
||||
BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
|
||||
uint32_t NumElems)
|
||||
: ElemSize(ElemSize) {
|
||||
BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
|
||||
uint32_t ElemSize, uint32_t NumElems)
|
||||
: ElemTyNoQual(Ty), ElemSize(ElemSize) {
|
||||
Kind = BTF::BTF_KIND_ARRAY;
|
||||
BTFType.NameOff = 0;
|
||||
BTFType.Info = Kind << 24;
|
||||
@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) {
|
||||
// created during initial type traversal. Just
|
||||
// retrieve that type id.
|
||||
ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
|
||||
|
||||
ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual)
|
||||
: ArrayInfo.ElemType;
|
||||
}
|
||||
|
||||
void BTFTypeArray::emitType(MCStreamer &OS) {
|
||||
@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
|
||||
|
||||
void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
|
||||
uint32_t &ElementTypeId) {
|
||||
ElementTypeId = ArrayInfo.ElemType;
|
||||
ElementTypeId = ElemTypeNoQual;
|
||||
LocOffset = Loc * ElemSize;
|
||||
}
|
||||
|
||||
@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
|
||||
} else {
|
||||
BTFMember.Offset = DDTy->getOffsetInBits();
|
||||
}
|
||||
BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
|
||||
const auto *BaseTy = DDTy->getBaseType();
|
||||
BTFMember.Type = BDebug.getTypeId(BaseTy);
|
||||
MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy)));
|
||||
Members.push_back(BTFMember);
|
||||
}
|
||||
}
|
||||
@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); }
|
||||
|
||||
void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
|
||||
uint32_t &MemberType) {
|
||||
MemberType = Members[Loc].Type;
|
||||
MemberType = MemberTypeNoQual[Loc];
|
||||
MemberOffset =
|
||||
HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
|
||||
}
|
||||
@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
|
||||
uint32_t ElemTypeId, ElemSize;
|
||||
const DIType *ElemType = CTy->getBaseType();
|
||||
visitTypeEntry(ElemType, ElemTypeId, false, false);
|
||||
|
||||
// Strip qualifiers from element type to get accurate element size.
|
||||
ElemType = stripQualifiers(ElemType);
|
||||
ElemSize = ElemType->getSizeInBits() >> 3;
|
||||
|
||||
if (!CTy->getSizeInBits()) {
|
||||
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
|
||||
auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemType, ElemTypeId, 0, 0);
|
||||
ArrayTypes.push_back(TypeEntry.get());
|
||||
ElemTypeId = addType(std::move(TypeEntry), CTy);
|
||||
} else {
|
||||
@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
|
||||
const DISubrange *SR = cast<DISubrange>(Element);
|
||||
auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
|
||||
int64_t Count = CI->getSExtValue();
|
||||
const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr;
|
||||
|
||||
auto TypeEntry =
|
||||
llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
|
||||
llvm::make_unique<BTFTypeArray>(ArrayElemTy, ElemTypeId,
|
||||
ElemSize, Count);
|
||||
ArrayTypes.push_back(TypeEntry.get());
|
||||
if (I == 0)
|
||||
ElemTypeId = addType(std::move(TypeEntry), CTy);
|
||||
@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
|
||||
unsigned RootId = populateStructType(RootTy);
|
||||
setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
|
||||
unsigned RootTySize = PrevStructType->getStructSize();
|
||||
StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1);
|
||||
|
||||
BTFOffsetReloc OffsetReloc;
|
||||
OffsetReloc.Label = ORSym;
|
||||
OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
|
||||
OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back());
|
||||
OffsetReloc.TypeID = RootId;
|
||||
|
||||
uint32_t Start = 0, End = 0, Offset = 0;
|
||||
bool FirstAccess = true;
|
||||
for (auto C : AccessPattern) {
|
||||
for (auto C : IndexPattern) {
|
||||
if (C != ':') {
|
||||
End++;
|
||||
} else {
|
||||
std::string SubStr = AccessPattern.substr(Start, End - Start);
|
||||
std::string SubStr = IndexPattern.substr(Start, End - Start);
|
||||
int Loc = std::stoi(SubStr);
|
||||
|
||||
if (FirstAccess) {
|
||||
@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
|
||||
Offset += LocOffset;
|
||||
PrevArrayType = nullptr;
|
||||
setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
|
||||
} else {
|
||||
llvm_unreachable("Internal Error: BTF offset relocation type traversal error");
|
||||
}
|
||||
|
||||
Start = End + 1;
|
||||
End = Start;
|
||||
}
|
||||
}
|
||||
AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
|
||||
AccessOffsets[AccessPattern.str()] = Offset;
|
||||
OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
|
||||
}
|
||||
|
||||
@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
|
||||
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
|
||||
DIType *Ty = dyn_cast<DIType>(MDN);
|
||||
std::string TypeName = Ty->getName();
|
||||
int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
|
||||
int64_t Imm = AccessOffsets[GVar->getName().str()];
|
||||
|
||||
// Emit "mov ri, <imm>" for abstract member accesses.
|
||||
OutMI.setOpcode(BPF::MOV_ri);
|
||||
|
@ -104,11 +104,14 @@ class BTFTypeEnum : public BTFTypeBase {
|
||||
|
||||
/// Handle array type.
|
||||
class BTFTypeArray : public BTFTypeBase {
|
||||
const DIType *ElemTyNoQual;
|
||||
uint32_t ElemSize;
|
||||
struct BTF::BTFArray ArrayInfo;
|
||||
uint32_t ElemTypeNoQual;
|
||||
|
||||
public:
|
||||
BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
|
||||
BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
|
||||
uint32_t ElemSize, uint32_t NumElems);
|
||||
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
|
||||
void completeType(BTFDebug &BDebug);
|
||||
void emitType(MCStreamer &OS);
|
||||
@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase {
|
||||
const DICompositeType *STy;
|
||||
bool HasBitField;
|
||||
std::vector<struct BTF::BTFMember> Members;
|
||||
std::vector<uint32_t> MemberTypeNoQual;
|
||||
|
||||
public:
|
||||
BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
|
||||
|
@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
|
||||
Res = V;
|
||||
} else
|
||||
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
|
||||
|
||||
MCBinaryExpr::Opcode Opcode;
|
||||
switch (getLexer().getKind()) {
|
||||
default:
|
||||
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
|
||||
return MatchOperand_Success;
|
||||
case AsmToken::Plus:
|
||||
Opcode = MCBinaryExpr::Add;
|
||||
break;
|
||||
case AsmToken::Minus:
|
||||
Opcode = MCBinaryExpr::Sub;
|
||||
break;
|
||||
}
|
||||
|
||||
const MCExpr *Expr;
|
||||
if (getParser().parseExpression(Expr))
|
||||
return MatchOperand_ParseFail;
|
||||
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
|
||||
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
|
||||
return MatchOperand_Success;
|
||||
}
|
||||
|
@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
|
||||
uint64_t FrameSize = MFI.getStackSize();
|
||||
|
||||
// Get the alignment.
|
||||
uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
|
||||
: getStackAlignment();
|
||||
unsigned StackAlign = getStackAlignment();
|
||||
if (RI->needsStackRealignment(MF)) {
|
||||
unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
|
||||
FrameSize += (MaxStackAlign - StackAlign);
|
||||
StackAlign = MaxStackAlign;
|
||||
}
|
||||
|
||||
// Set Max Call Frame Size
|
||||
uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
|
||||
MFI.setMaxCallFrameSize(MaxCallSize);
|
||||
|
||||
// Make sure the frame is aligned.
|
||||
FrameSize = alignTo(FrameSize, StackAlign);
|
||||
@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
|
||||
if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
|
||||
report_fatal_error(
|
||||
"RISC-V backend can't currently handle functions that need stack "
|
||||
"realignment and have variable sized objects");
|
||||
}
|
||||
|
||||
unsigned FPReg = getFPReg(STI);
|
||||
unsigned SPReg = getSPReg(STI);
|
||||
|
||||
@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
nullptr, RI->getDwarfRegNum(FPReg, true), 0));
|
||||
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
|
||||
.addCFIIndex(CFIIndex);
|
||||
|
||||
// Realign Stack
|
||||
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
|
||||
if (RI->needsStackRealignment(MF)) {
|
||||
unsigned MaxAlignment = MFI.getMaxAlignment();
|
||||
|
||||
const RISCVInstrInfo *TII = STI.getInstrInfo();
|
||||
if (isInt<12>(-(int)MaxAlignment)) {
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(-(int)MaxAlignment);
|
||||
} else {
|
||||
unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
|
||||
unsigned VR =
|
||||
MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
|
||||
.addReg(SPReg)
|
||||
.addImm(ShiftAmount);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
|
||||
.addReg(VR)
|
||||
.addImm(ShiftAmount);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
|
||||
if (FI >= MinCSFI && FI <= MaxCSFI) {
|
||||
FrameReg = RISCV::X2;
|
||||
Offset += MF.getFrameInfo().getStackSize();
|
||||
} else if (RI->needsStackRealignment(MF)) {
|
||||
assert(!MFI.hasVarSizedObjects() &&
|
||||
"Unexpected combination of stack realignment and varsized objects");
|
||||
// If the stack was realigned, the frame pointer is set in order to allow
|
||||
// SP to be restored, but we still access stack objects using SP.
|
||||
FrameReg = RISCV::X2;
|
||||
Offset += MF.getFrameInfo().getStackSize();
|
||||
} else {
|
||||
FrameReg = RI->getFrameRegister(MF);
|
||||
if (hasFP(MF))
|
||||
|
@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
|
||||
// We can materialise `c1 << c2` into an add immediate, so it's "free",
|
||||
// and the combine should happen, to potentially allow further combines
|
||||
// later.
|
||||
if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
|
||||
if (ShiftedC1Int.getMinSignedBits() <= 64 &&
|
||||
isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
|
||||
return true;
|
||||
|
||||
// We can materialise `c1` in an add immediate, so it's "free", and the
|
||||
// combine should be prevented.
|
||||
if (isLegalAddImmediate(C1Int.getSExtValue()))
|
||||
if (C1Int.getMinSignedBits() <= 64 &&
|
||||
isLegalAddImmediate(C1Int.getSExtValue()))
|
||||
return false;
|
||||
|
||||
// Neither constant will fit into an immediate, so find materialisation
|
||||
@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// getConstraintType - Given a constraint letter, return the type of
|
||||
/// constraint it is for this target.
|
||||
RISCVTargetLowering::ConstraintType
|
||||
RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
if (Constraint.size() == 1) {
|
||||
switch (Constraint[0]) {
|
||||
default:
|
||||
break;
|
||||
case 'f':
|
||||
return C_RegisterClass;
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
return TargetLowering::getConstraintType(Constraint);
|
||||
}
|
||||
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
StringRef Constraint,
|
||||
@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
switch (Constraint[0]) {
|
||||
case 'r':
|
||||
return std::make_pair(0U, &RISCV::GPRRegClass);
|
||||
case 'f':
|
||||
if (Subtarget.hasStdExtF() && VT == MVT::f32)
|
||||
return std::make_pair(0U, &RISCV::FPR32RegClass);
|
||||
if (Subtarget.hasStdExtD() && VT == MVT::f64)
|
||||
return std::make_pair(0U, &RISCV::FPR64RegClass);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -92,6 +92,7 @@ class RISCVTargetLowering : public TargetLowering {
|
||||
// This method returns the name of a target specific DAG node.
|
||||
const char *getTargetNodeName(unsigned Opcode) const override;
|
||||
|
||||
ConstraintType getConstraintType(StringRef Constraint) const override;
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
|
||||
StringRef Constraint, MVT VT) const override;
|
||||
|
@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'e':
|
||||
return C_RegisterClass;
|
||||
case 'I': // SIMM13
|
||||
return C_Other;
|
||||
return C_Immediate;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'K': // Signed 16-bit constant
|
||||
case 'L': // Signed 20-bit displacement (on all targets we support)
|
||||
case 'M': // 0x7fffffff
|
||||
return C_Other;
|
||||
return C_Immediate;
|
||||
|
||||
default:
|
||||
break;
|
||||
|
@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
|
||||
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
|
||||
"Support 64-bit instructions">;
|
||||
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
|
||||
"64-bit with cmpxchg16b">;
|
||||
"64-bit with cmpxchg16b",
|
||||
[FeatureCMPXCHG8B]>;
|
||||
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
|
||||
"SHLD instruction is slow">;
|
||||
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
|
||||
|
@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
|
||||
Complexity += 2;
|
||||
}
|
||||
|
||||
// Heuristic: try harder to form an LEA from ADD if the operands set flags.
|
||||
// Unlike ADD, LEA does not affect flags, so we will be less likely to require
|
||||
// duplicating flag-producing instructions later in the pipeline.
|
||||
if (N.getOpcode() == ISD::ADD) {
|
||||
auto isMathWithFlags = [](SDValue V) {
|
||||
switch (V.getOpcode()) {
|
||||
case X86ISD::ADD:
|
||||
case X86ISD::SUB:
|
||||
case X86ISD::ADC:
|
||||
case X86ISD::SBB:
|
||||
/* TODO: These opcodes can be added safely, but we may want to justify
|
||||
their inclusion for different reasons (better for reg-alloc).
|
||||
case X86ISD::SMUL:
|
||||
case X86ISD::UMUL:
|
||||
case X86ISD::OR:
|
||||
case X86ISD::XOR:
|
||||
case X86ISD::AND:
|
||||
*/
|
||||
// Value 1 is the flag output of the node - verify it's not dead.
|
||||
return !SDValue(V.getNode(), 1).use_empty();
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
};
|
||||
// TODO: This could be an 'or' rather than 'and' to make the transform more
|
||||
// likely to happen. We might want to factor in whether there's a
|
||||
// load folding opportunity for the math op that disappears with LEA.
|
||||
if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
|
||||
Complexity++;
|
||||
}
|
||||
|
||||
if (AM.Disp)
|
||||
Complexity++;
|
||||
|
||||
@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
|
||||
SDValue ImplDef = SDValue(
|
||||
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
|
||||
NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
|
||||
NBits);
|
||||
|
||||
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
|
||||
NBits = SDValue(
|
||||
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
|
||||
NBits, SRIdxVal), 0);
|
||||
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
|
||||
|
||||
if (Subtarget->hasBMI2()) {
|
||||
|
@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
InFlag = Chain.getValue(1);
|
||||
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
|
||||
|
||||
// Save heapallocsite metadata.
|
||||
if (CLI.CS)
|
||||
if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
|
||||
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
|
||||
|
||||
// Create the CALLSEQ_END node.
|
||||
unsigned NumBytesForCalleeToPop;
|
||||
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
|
||||
@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
|
||||
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
|
||||
Idx == (VT.getVectorNumElements() / 2) &&
|
||||
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
Src.getOperand(1).getValueType() == SubVT &&
|
||||
isNullConstant(Src.getOperand(2))) {
|
||||
Ops.push_back(Src.getOperand(1));
|
||||
Ops.push_back(Sub);
|
||||
@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
case X86ISD::SUBV_BROADCAST: {
|
||||
// Reduce size of broadcast if we don't need the upper half.
|
||||
unsigned HalfElts = NumElts / 2;
|
||||
if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
|
||||
SDValue Src = Op.getOperand(0);
|
||||
MVT SrcVT = Src.getSimpleValueType();
|
||||
|
||||
SDValue Half = Src;
|
||||
if (SrcVT.getVectorNumElements() != HalfElts) {
|
||||
MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
|
||||
Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
|
||||
}
|
||||
|
||||
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
|
||||
TLO.DAG, SDLoc(Op),
|
||||
Half.getValueSizeInBits()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
case X86ISD::VPERMV: {
|
||||
SDValue Mask = Op.getOperand(0);
|
||||
APInt MaskUndef, MaskZero;
|
||||
@ -34134,6 +34121,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
|
||||
SDValue Insert =
|
||||
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
return TLO.CombineTo(Op, Insert);
|
||||
}
|
||||
// Subvector broadcast.
|
||||
case X86ISD::SUBV_BROADCAST: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Src = Op.getOperand(0);
|
||||
if (Src.getValueSizeInBits() > ExtSizeInBits)
|
||||
Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
|
||||
else if (Src.getValueSizeInBits() < ExtSizeInBits) {
|
||||
MVT SrcSVT = Src.getSimpleValueType().getScalarType();
|
||||
MVT SrcVT =
|
||||
MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
|
||||
Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
|
||||
}
|
||||
return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
|
||||
TLO.DAG, DL, ExtSizeInBits));
|
||||
}
|
||||
// Byte shifts by immediate.
|
||||
case X86ISD::VSHLDQ:
|
||||
@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
|
||||
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
|
||||
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
|
||||
isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
|
||||
Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() &&
|
||||
Vec.hasOneUse()) {
|
||||
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
|
||||
Vec.getOperand(1), Vec.getOperand(2));
|
||||
@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
|
||||
case 'I':
|
||||
case 'J':
|
||||
case 'K':
|
||||
case 'L':
|
||||
case 'M':
|
||||
case 'N':
|
||||
case 'G':
|
||||
case 'L':
|
||||
case 'M':
|
||||
return C_Immediate;
|
||||
case 'C':
|
||||
case 'e':
|
||||
case 'Z':
|
||||
|
@ -3288,26 +3288,35 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
|
||||
|
||||
// Look for an 'and' of two (opposite) logical shifts.
|
||||
// Pick the single-use shift as XShift.
|
||||
Value *XShift, *YShift;
|
||||
Instruction *XShift, *YShift;
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
|
||||
m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
|
||||
m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
|
||||
m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift)))))
|
||||
return nullptr;
|
||||
|
||||
// If YShift is a single-use 'lshr', swap the shifts around.
|
||||
if (match(YShift, m_OneUse(m_AnyLShr)))
|
||||
// If YShift is a 'lshr', swap the shifts around.
|
||||
if (match(YShift, m_AnyLShr))
|
||||
std::swap(XShift, YShift);
|
||||
|
||||
// The shifts must be in opposite directions.
|
||||
Instruction::BinaryOps XShiftOpcode =
|
||||
cast<BinaryOperator>(XShift)->getOpcode();
|
||||
if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
|
||||
auto XShiftOpcode = XShift->getOpcode();
|
||||
if (XShiftOpcode == YShift->getOpcode())
|
||||
return nullptr; // Do not care about same-direction shifts here.
|
||||
|
||||
Value *X, *XShAmt, *Y, *YShAmt;
|
||||
match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
|
||||
match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
|
||||
|
||||
// If one of the values being shifted is a constant, then we will end with
|
||||
// and+icmp, and shift instr will be constant-folded. If they are not,
|
||||
// however, we will need to ensure that we won't increase instruction count.
|
||||
if (!isa<Constant>(X) && !isa<Constant>(Y)) {
|
||||
// At least one of the hands of the 'and' should be one-use shift.
|
||||
if (!match(I.getOperand(0),
|
||||
m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Can we fold (XShAmt+YShAmt) ?
|
||||
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
|
||||
SQ.getWithInstruction(&I));
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "llvm/Support/DebugCounter.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "div-rem-pairs"
|
||||
@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
|
||||
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
|
||||
"Controls transformations in div-rem-pairs pass");
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). If they exist in different basic blocks, bring
|
||||
/// them together by hoisting or replace the common division operation that is
|
||||
/// implicit in the remainder:
|
||||
/// X % Y <--> X - ((X / Y) * Y).
|
||||
///
|
||||
/// We can largely ignore the normal safety and cost constraints on speculation
|
||||
/// of these ops when we find a matching pair. This is because we are already
|
||||
/// guaranteed that any exceptions and most cost are already incurred by the
|
||||
/// first member of the pair.
|
||||
///
|
||||
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
|
||||
/// SimplifyCFG, but it's split off on its own because it's different enough
|
||||
/// that it doesn't quite match the stated objectives of those passes.
|
||||
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
const DominatorTree &DT) {
|
||||
bool Changed = false;
|
||||
/// A thin wrapper to store two values that we matched as div-rem pair.
|
||||
/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
|
||||
struct DivRemPairWorklistEntry {
|
||||
/// The actual udiv/sdiv instruction. Source of truth.
|
||||
AssertingVH<Instruction> DivInst;
|
||||
|
||||
/// The instruction that we have matched as a remainder instruction.
|
||||
/// Should only be used as Value, don't introspect it.
|
||||
AssertingVH<Instruction> RemInst;
|
||||
|
||||
DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
|
||||
: DivInst(DivInst_), RemInst(RemInst_) {
|
||||
assert((DivInst->getOpcode() == Instruction::UDiv ||
|
||||
DivInst->getOpcode() == Instruction::SDiv) &&
|
||||
"Not a division.");
|
||||
assert(DivInst->getType() == RemInst->getType() && "Types should match.");
|
||||
// We can't check anything else about remainder instruction,
|
||||
// it's not strictly required to be a urem/srem.
|
||||
}
|
||||
|
||||
/// The type for this pair, identical for both the div and rem.
|
||||
Type *getType() const { return DivInst->getType(); }
|
||||
|
||||
/// Is this pair signed or unsigned?
|
||||
bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
|
||||
|
||||
/// In this pair, what are the divident and divisor?
|
||||
Value *getDividend() const { return DivInst->getOperand(0); }
|
||||
Value *getDivisor() const { return DivInst->getOperand(1); }
|
||||
};
|
||||
using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). Place those pairs into a worklist for further
|
||||
/// processing. This indirection is needed because we have to use TrackingVH<>
|
||||
/// because we will be doing RAUW, and if one of the rem instructions we change
|
||||
/// happens to be an input to another div/rem in the maps, we'd have problems.
|
||||
static DivRemWorklistTy getWorklist(Function &F) {
|
||||
// Insert all divide and remainder instructions into maps keyed by their
|
||||
// operands and opcode (signed or unsigned).
|
||||
DenseMap<DivRemMapKey, Instruction *> DivMap;
|
||||
@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
}
|
||||
}
|
||||
|
||||
// We'll accumulate the matching pairs of div-rem instructions here.
|
||||
DivRemWorklistTy Worklist;
|
||||
|
||||
// We can iterate over either map because we are only looking for matched
|
||||
// pairs. Choose remainders for efficiency because they are usually even more
|
||||
// rare than division.
|
||||
@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
if (!DivInst)
|
||||
continue;
|
||||
|
||||
// We have a matching pair of div/rem instructions. If one dominates the
|
||||
// other, hoist and/or replace one.
|
||||
// We have a matching pair of div/rem instructions.
|
||||
NumPairs++;
|
||||
Instruction *RemInst = RemPair.second;
|
||||
bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
|
||||
bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
|
||||
|
||||
// Place it in the worklist.
|
||||
Worklist.emplace_back(DivInst, RemInst);
|
||||
}
|
||||
|
||||
return Worklist;
|
||||
}
|
||||
|
||||
/// Find matching pairs of integer div/rem ops (they have the same numerator,
|
||||
/// denominator, and signedness). If they exist in different basic blocks, bring
|
||||
/// them together by hoisting or replace the common division operation that is
|
||||
/// implicit in the remainder:
|
||||
/// X % Y <--> X - ((X / Y) * Y).
|
||||
///
|
||||
/// We can largely ignore the normal safety and cost constraints on speculation
|
||||
/// of these ops when we find a matching pair. This is because we are already
|
||||
/// guaranteed that any exceptions and most cost are already incurred by the
|
||||
/// first member of the pair.
|
||||
///
|
||||
/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
|
||||
/// SimplifyCFG, but it's split off on its own because it's different enough
|
||||
/// that it doesn't quite match the stated objectives of those passes.
|
||||
static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
const DominatorTree &DT) {
|
||||
bool Changed = false;
|
||||
|
||||
// Get the matching pairs of div-rem instructions. We want this extra
|
||||
// indirection to avoid dealing with having to RAUW the keys of the maps.
|
||||
DivRemWorklistTy Worklist = getWorklist(F);
|
||||
|
||||
// Process each entry in the worklist.
|
||||
for (DivRemPairWorklistEntry &E : Worklist) {
|
||||
bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
|
||||
|
||||
auto &DivInst = E.DivInst;
|
||||
auto &RemInst = E.RemInst;
|
||||
|
||||
// If the target supports div+rem and the instructions are in the same block
|
||||
// already, there's nothing to do. The backend should handle this. If the
|
||||
@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
// The target does not have a single div/rem operation. Decompose the
|
||||
// remainder calculation as:
|
||||
// X % Y --> X - ((X / Y) * Y).
|
||||
Value *X = RemInst->getOperand(0);
|
||||
Value *Y = RemInst->getOperand(1);
|
||||
Value *X = E.getDividend();
|
||||
Value *Y = E.getDivisor();
|
||||
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
|
||||
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
|
||||
|
||||
@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
|
||||
|
||||
// Now kill the explicit remainder. We have replaced it with:
|
||||
// (sub X, (mul (div X, Y), Y)
|
||||
RemInst->replaceAllUsesWith(Sub);
|
||||
RemInst->eraseFromParent();
|
||||
Sub->setName(RemInst->getName() + ".decomposed");
|
||||
Instruction *OrigRemInst = RemInst;
|
||||
// Update AssertingVH<> with new instruction so it doesn't assert.
|
||||
RemInst = Sub;
|
||||
// And replace the original instruction with the new one.
|
||||
OrigRemInst->replaceAllUsesWith(Sub);
|
||||
OrigRemInst->eraseFromParent();
|
||||
NumDecomposed++;
|
||||
}
|
||||
Changed = true;
|
||||
@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
|
||||
return optimizeDivRem(F, TTI, DT);
|
||||
}
|
||||
};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
char DivRemPairsLegacyPass::ID = 0;
|
||||
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
|
||||
|
@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
|
||||
// speculation if the predecessor is an invoke. This doesn't seem
|
||||
// fundamental and we should probably be splitting critical edges
|
||||
// differently.
|
||||
if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
|
||||
isa<InvokeInst>(PredBB->getTerminator())) {
|
||||
const auto *TermInst = PredBB->getTerminator();
|
||||
if (isa<IndirectBrInst>(TermInst) ||
|
||||
isa<InvokeInst>(TermInst) ||
|
||||
isa<CallBrInst>(TermInst)) {
|
||||
LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
|
||||
<< PredBB->getName() << "\n");
|
||||
return false;
|
||||
|
@ -185,15 +185,20 @@ class CXXMemberCallExpr final : public CallExpr {
|
||||
static CXXMemberCallExpr *CreateEmpty(const ASTContext &Ctx, unsigned NumArgs,
|
||||
EmptyShell Empty);
|
||||
|
||||
/// Retrieves the implicit object argument for the member call.
|
||||
/// Retrieve the implicit object argument for the member call.
|
||||
///
|
||||
/// For example, in "x.f(5)", this returns the sub-expression "x".
|
||||
Expr *getImplicitObjectArgument() const;
|
||||
|
||||
/// Retrieves the declaration of the called method.
|
||||
/// Retrieve the type of the object argument.
|
||||
///
|
||||
/// Note that this always returns a non-pointer type.
|
||||
QualType getObjectType() const;
|
||||
|
||||
/// Retrieve the declaration of the called method.
|
||||
CXXMethodDecl *getMethodDecl() const;
|
||||
|
||||
/// Retrieves the CXXRecordDecl for the underlying type of
|
||||
/// Retrieve the CXXRecordDecl for the underlying type of
|
||||
/// the implicit object argument.
|
||||
///
|
||||
/// Note that this is may not be the same declaration as that of the class
|
||||
|
@ -598,6 +598,10 @@ def ext_implicit_lib_function_decl : ExtWarn<
|
||||
def note_include_header_or_declare : Note<
|
||||
"include the header <%0> or explicitly provide a declaration for '%1'">;
|
||||
def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">;
|
||||
def warn_implicit_decl_no_jmp_buf
|
||||
: Warning<"declaration of built-in function '%0' requires the declaration"
|
||||
" of the 'jmp_buf' type, commonly provided in the header <setjmp.h>.">,
|
||||
InGroup<DiagGroup<"incomplete-setjmp-declaration">>;
|
||||
def warn_implicit_decl_requires_sysheader : Warning<
|
||||
"declaration of built-in function '%1' requires inclusion of the header <%0>">,
|
||||
InGroup<BuiltinRequiresHeader>;
|
||||
|
@ -1249,15 +1249,9 @@ class TargetInfo : public virtual TransferrableTargetInfo,
|
||||
bool isBigEndian() const { return BigEndian; }
|
||||
bool isLittleEndian() const { return !BigEndian; }
|
||||
|
||||
enum CallingConvMethodType {
|
||||
CCMT_Unknown,
|
||||
CCMT_Member,
|
||||
CCMT_NonMember
|
||||
};
|
||||
|
||||
/// Gets the default calling convention for the given target and
|
||||
/// declaration context.
|
||||
virtual CallingConv getDefaultCallingConv(CallingConvMethodType MT) const {
|
||||
virtual CallingConv getDefaultCallingConv() const {
|
||||
// Not all targets will specify an explicit calling convention that we can
|
||||
// express. This will always do the right thing, even though it's not
|
||||
// an explicit calling convention.
|
||||
|
@ -518,7 +518,7 @@ def cl_mad_enable : Flag<["-"], "cl-mad-enable">, Group<opencl_Group>, Flags<[CC
|
||||
def cl_no_signed_zeros : Flag<["-"], "cl-no-signed-zeros">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"OpenCL only. Allow use of less precise no signed zeros computations in the generated binary.">;
|
||||
def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,c++">;
|
||||
HelpText<"OpenCL language standard to compile for.">, Values<"cl,CL,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,clc++,CLC++">;
|
||||
def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
|
||||
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
|
||||
|
@ -174,6 +174,7 @@ LANGSTANDARD_ALIAS_DEPR(opencl10, "CL")
|
||||
LANGSTANDARD_ALIAS_DEPR(opencl11, "CL1.1")
|
||||
LANGSTANDARD_ALIAS_DEPR(opencl12, "CL1.2")
|
||||
LANGSTANDARD_ALIAS_DEPR(opencl20, "CL2.0")
|
||||
LANGSTANDARD_ALIAS_DEPR(openclcpp, "CLC++")
|
||||
|
||||
// CUDA
|
||||
LANGSTANDARD(cuda, "cuda", CUDA, "NVIDIA CUDA(tm)",
|
||||
|
@ -11165,6 +11165,7 @@ class Sema {
|
||||
// Emitting members of dllexported classes is delayed until the class
|
||||
// (including field initializers) is fully parsed.
|
||||
SmallVector<CXXRecordDecl*, 4> DelayedDllExportClasses;
|
||||
SmallVector<CXXMethodDecl*, 4> DelayedDllExportMemberFunctions;
|
||||
|
||||
private:
|
||||
class SavePendingParsedClassStateRAII {
|
||||
|
@ -10035,7 +10035,7 @@ CallingConv ASTContext::getDefaultCallingConvention(bool IsVariadic,
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Target->getDefaultCallingConv(TargetInfo::CCMT_Unknown);
|
||||
return Target->getDefaultCallingConv();
|
||||
}
|
||||
|
||||
bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const {
|
||||
|
@ -651,6 +651,13 @@ Expr *CXXMemberCallExpr::getImplicitObjectArgument() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QualType CXXMemberCallExpr::getObjectType() const {
|
||||
QualType Ty = getImplicitObjectArgument()->getType();
|
||||
if (Ty->isPointerType())
|
||||
Ty = Ty->getPointeeType();
|
||||
return Ty;
|
||||
}
|
||||
|
||||
CXXMethodDecl *CXXMemberCallExpr::getMethodDecl() const {
|
||||
if (const auto *MemExpr = dyn_cast<MemberExpr>(getCallee()->IgnoreParens()))
|
||||
return cast<CXXMethodDecl>(MemExpr->getMemberDecl());
|
||||
|
@ -177,7 +177,7 @@ class ItaniumCXXABI : public CXXABI {
|
||||
if (!isVariadic && T.isWindowsGNUEnvironment() &&
|
||||
T.getArch() == llvm::Triple::x86)
|
||||
return CC_X86ThisCall;
|
||||
return CC_C;
|
||||
return Context.getTargetInfo().getDefaultCallingConv();
|
||||
}
|
||||
|
||||
// We cheat and just check that the class has a vtable pointer, and that it's
|
||||
|
@ -82,7 +82,7 @@ class MicrosoftCXXABI : public CXXABI {
|
||||
if (!isVariadic &&
|
||||
Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86)
|
||||
return CC_X86ThisCall;
|
||||
return CC_C;
|
||||
return Context.getTargetInfo().getDefaultCallingConv();
|
||||
}
|
||||
|
||||
bool isNearlyEmpty(const CXXRecordDecl *RD) const override {
|
||||
|
@ -196,9 +196,6 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
Builder.defineMacro("__ARM_NEON_FP", "0xE");
|
||||
}
|
||||
|
||||
if (FPU & SveMode)
|
||||
Builder.defineMacro("__ARM_FEATURE_SVE", "1");
|
||||
|
||||
if (HasCRC)
|
||||
Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
|
||||
|
||||
@ -351,10 +348,19 @@ const char *const AArch64TargetInfo::GCCRegNames[] = {
|
||||
"d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22",
|
||||
"d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
|
||||
|
||||
// Vector registers
|
||||
// Neon vector registers
|
||||
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11",
|
||||
"v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22",
|
||||
"v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
|
||||
"v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
|
||||
|
||||
// SVE vector registers
|
||||
"z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", "z8", "z9", "z10",
|
||||
"z11", "z12", "z13", "z14", "z15", "z16", "z17", "z18", "z19", "z20", "z21",
|
||||
"z22", "z23", "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31",
|
||||
|
||||
// SVE predicate registers
|
||||
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10",
|
||||
"p11", "p12", "p13", "p14", "p15"
|
||||
};
|
||||
|
||||
ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
|
||||
|
@ -618,8 +618,11 @@ class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo<Target> {
|
||||
Builder.defineMacro("_XOPEN_SOURCE", "600");
|
||||
else
|
||||
Builder.defineMacro("_XOPEN_SOURCE", "500");
|
||||
if (Opts.CPlusPlus)
|
||||
if (Opts.CPlusPlus) {
|
||||
Builder.defineMacro("__C99FEATURES__");
|
||||
Builder.defineMacro("_FILE_OFFSET_BITS", "64");
|
||||
}
|
||||
// GCC restricts the next two to C++.
|
||||
Builder.defineMacro("_LARGEFILE_SOURCE");
|
||||
Builder.defineMacro("_LARGEFILE64_SOURCE");
|
||||
Builder.defineMacro("__EXTENSIONS__");
|
||||
|
@ -56,6 +56,10 @@ bool RISCVTargetInfo::validateAsmConstraint(
|
||||
// A 5-bit unsigned immediate for CSR access instructions.
|
||||
Info.setRequiresImmediate(0, 31);
|
||||
return true;
|
||||
case 'f':
|
||||
// A floating-point register.
|
||||
Info.setAllowsRegister();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -65,9 +69,18 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
|
||||
Builder.defineMacro("__riscv");
|
||||
bool Is64Bit = getTriple().getArch() == llvm::Triple::riscv64;
|
||||
Builder.defineMacro("__riscv_xlen", Is64Bit ? "64" : "32");
|
||||
// TODO: modify when more code models and ABIs are supported.
|
||||
// TODO: modify when more code models are supported.
|
||||
Builder.defineMacro("__riscv_cmodel_medlow");
|
||||
Builder.defineMacro("__riscv_float_abi_soft");
|
||||
|
||||
StringRef ABIName = getABI();
|
||||
if (ABIName == "ilp32f" || ABIName == "lp64f")
|
||||
Builder.defineMacro("__riscv_float_abi_single");
|
||||
else if (ABIName == "ilp32d" || ABIName == "lp64d")
|
||||
Builder.defineMacro("__riscv_float_abi_double");
|
||||
else if (ABIName == "ilp32e")
|
||||
Builder.defineMacro("__riscv_abi_rve");
|
||||
else
|
||||
Builder.defineMacro("__riscv_float_abi_soft");
|
||||
|
||||
if (HasM) {
|
||||
Builder.defineMacro("__riscv_mul");
|
||||
|
@ -87,8 +87,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
|
||||
}
|
||||
|
||||
bool setABI(const std::string &Name) override {
|
||||
// TODO: support ilp32f and ilp32d ABIs.
|
||||
if (Name == "ilp32") {
|
||||
if (Name == "ilp32" || Name == "ilp32f" || Name == "ilp32d") {
|
||||
ABI = Name;
|
||||
return true;
|
||||
}
|
||||
@ -105,8 +104,7 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
|
||||
}
|
||||
|
||||
bool setABI(const std::string &Name) override {
|
||||
// TODO: support lp64f and lp64d ABIs.
|
||||
if (Name == "lp64") {
|
||||
if (Name == "lp64" || Name == "lp64f" || Name == "lp64d") {
|
||||
ABI = Name;
|
||||
return true;
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo {
|
||||
: CCCR_Warning;
|
||||
}
|
||||
|
||||
CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
|
||||
CallingConv getDefaultCallingConv() const override {
|
||||
return CC_SpirFunction;
|
||||
}
|
||||
|
||||
|
@ -320,8 +320,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
|
||||
}
|
||||
}
|
||||
|
||||
CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
|
||||
return MT == CCMT_Member ? CC_X86ThisCall : CC_C;
|
||||
CallingConv getDefaultCallingConv() const override {
|
||||
return CC_C;
|
||||
}
|
||||
|
||||
bool hasSjLjLowering() const override { return true; }
|
||||
@ -659,7 +659,7 @@ class LLVM_LIBRARY_VISIBILITY X86_64TargetInfo : public X86TargetInfo {
|
||||
}
|
||||
}
|
||||
|
||||
CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
|
||||
CallingConv getDefaultCallingConv() const override {
|
||||
return CC_C;
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,7 @@ std::string getClangRepositoryPath() {
|
||||
|
||||
// If the CLANG_REPOSITORY is empty, try to use the SVN keyword. This helps us
|
||||
// pick up a tag in an SVN export, for example.
|
||||
StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/trunk/lib/Basic/Version.cpp $");
|
||||
StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/branches/release_90/lib/Basic/Version.cpp $");
|
||||
if (URL.empty()) {
|
||||
URL = SVNRepository.slice(SVNRepository.find(':'),
|
||||
SVNRepository.find("/lib/Basic"));
|
||||
|
@ -8011,6 +8011,151 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
||||
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
||||
"vgetq_lane");
|
||||
}
|
||||
case AArch64::BI_BitScanForward:
|
||||
case AArch64::BI_BitScanForward64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
|
||||
case AArch64::BI_BitScanReverse:
|
||||
case AArch64::BI_BitScanReverse64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
|
||||
case AArch64::BI_InterlockedAnd64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
|
||||
case AArch64::BI_InterlockedExchange64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
|
||||
case AArch64::BI_InterlockedExchangeSub64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
|
||||
case AArch64::BI_InterlockedOr64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
|
||||
case AArch64::BI_InterlockedXor64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
|
||||
case AArch64::BI_InterlockedDecrement64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
|
||||
case AArch64::BI_InterlockedIncrement64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
|
||||
case AArch64::BI_InterlockedExchange8_acq:
|
||||
case AArch64::BI_InterlockedExchange16_acq:
|
||||
case AArch64::BI_InterlockedExchange_acq:
|
||||
case AArch64::BI_InterlockedExchange64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
|
||||
case AArch64::BI_InterlockedExchange8_rel:
|
||||
case AArch64::BI_InterlockedExchange16_rel:
|
||||
case AArch64::BI_InterlockedExchange_rel:
|
||||
case AArch64::BI_InterlockedExchange64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
|
||||
case AArch64::BI_InterlockedExchange8_nf:
|
||||
case AArch64::BI_InterlockedExchange16_nf:
|
||||
case AArch64::BI_InterlockedExchange_nf:
|
||||
case AArch64::BI_InterlockedExchange64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange16_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange16_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange16_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
|
||||
case AArch64::BI_InterlockedOr8_acq:
|
||||
case AArch64::BI_InterlockedOr16_acq:
|
||||
case AArch64::BI_InterlockedOr_acq:
|
||||
case AArch64::BI_InterlockedOr64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
|
||||
case AArch64::BI_InterlockedOr8_rel:
|
||||
case AArch64::BI_InterlockedOr16_rel:
|
||||
case AArch64::BI_InterlockedOr_rel:
|
||||
case AArch64::BI_InterlockedOr64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
|
||||
case AArch64::BI_InterlockedOr8_nf:
|
||||
case AArch64::BI_InterlockedOr16_nf:
|
||||
case AArch64::BI_InterlockedOr_nf:
|
||||
case AArch64::BI_InterlockedOr64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
|
||||
case AArch64::BI_InterlockedXor8_acq:
|
||||
case AArch64::BI_InterlockedXor16_acq:
|
||||
case AArch64::BI_InterlockedXor_acq:
|
||||
case AArch64::BI_InterlockedXor64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
|
||||
case AArch64::BI_InterlockedXor8_rel:
|
||||
case AArch64::BI_InterlockedXor16_rel:
|
||||
case AArch64::BI_InterlockedXor_rel:
|
||||
case AArch64::BI_InterlockedXor64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
|
||||
case AArch64::BI_InterlockedXor8_nf:
|
||||
case AArch64::BI_InterlockedXor16_nf:
|
||||
case AArch64::BI_InterlockedXor_nf:
|
||||
case AArch64::BI_InterlockedXor64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
|
||||
case AArch64::BI_InterlockedAnd8_acq:
|
||||
case AArch64::BI_InterlockedAnd16_acq:
|
||||
case AArch64::BI_InterlockedAnd_acq:
|
||||
case AArch64::BI_InterlockedAnd64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
|
||||
case AArch64::BI_InterlockedAnd8_rel:
|
||||
case AArch64::BI_InterlockedAnd16_rel:
|
||||
case AArch64::BI_InterlockedAnd_rel:
|
||||
case AArch64::BI_InterlockedAnd64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
|
||||
case AArch64::BI_InterlockedAnd8_nf:
|
||||
case AArch64::BI_InterlockedAnd16_nf:
|
||||
case AArch64::BI_InterlockedAnd_nf:
|
||||
case AArch64::BI_InterlockedAnd64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
|
||||
case AArch64::BI_InterlockedIncrement16_acq:
|
||||
case AArch64::BI_InterlockedIncrement_acq:
|
||||
case AArch64::BI_InterlockedIncrement64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
|
||||
case AArch64::BI_InterlockedIncrement16_rel:
|
||||
case AArch64::BI_InterlockedIncrement_rel:
|
||||
case AArch64::BI_InterlockedIncrement64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
|
||||
case AArch64::BI_InterlockedIncrement16_nf:
|
||||
case AArch64::BI_InterlockedIncrement_nf:
|
||||
case AArch64::BI_InterlockedIncrement64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
|
||||
case AArch64::BI_InterlockedDecrement16_acq:
|
||||
case AArch64::BI_InterlockedDecrement_acq:
|
||||
case AArch64::BI_InterlockedDecrement64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
|
||||
case AArch64::BI_InterlockedDecrement16_rel:
|
||||
case AArch64::BI_InterlockedDecrement_rel:
|
||||
case AArch64::BI_InterlockedDecrement64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
|
||||
case AArch64::BI_InterlockedDecrement16_nf:
|
||||
case AArch64::BI_InterlockedDecrement_nf:
|
||||
case AArch64::BI_InterlockedDecrement64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
|
||||
|
||||
case AArch64::BI_InterlockedAdd: {
|
||||
Value *Arg0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Arg1 = EmitScalarExpr(E->getArg(1));
|
||||
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
||||
AtomicRMWInst::Add, Arg0, Arg1,
|
||||
llvm::AtomicOrdering::SequentiallyConsistent);
|
||||
return Builder.CreateAdd(RMWI, Arg1);
|
||||
}
|
||||
}
|
||||
|
||||
llvm::VectorType *VTy = GetNeonType(this, Type);
|
||||
@ -9128,151 +9273,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
||||
Int = Intrinsic::aarch64_neon_suqadd;
|
||||
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
|
||||
}
|
||||
case AArch64::BI_BitScanForward:
|
||||
case AArch64::BI_BitScanForward64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
|
||||
case AArch64::BI_BitScanReverse:
|
||||
case AArch64::BI_BitScanReverse64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
|
||||
case AArch64::BI_InterlockedAnd64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
|
||||
case AArch64::BI_InterlockedExchange64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
|
||||
case AArch64::BI_InterlockedExchangeSub64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
|
||||
case AArch64::BI_InterlockedOr64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
|
||||
case AArch64::BI_InterlockedXor64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
|
||||
case AArch64::BI_InterlockedDecrement64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
|
||||
case AArch64::BI_InterlockedIncrement64:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd_acq:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd_rel:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
|
||||
case AArch64::BI_InterlockedExchangeAdd8_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd16_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd_nf:
|
||||
case AArch64::BI_InterlockedExchangeAdd64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
|
||||
case AArch64::BI_InterlockedExchange8_acq:
|
||||
case AArch64::BI_InterlockedExchange16_acq:
|
||||
case AArch64::BI_InterlockedExchange_acq:
|
||||
case AArch64::BI_InterlockedExchange64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
|
||||
case AArch64::BI_InterlockedExchange8_rel:
|
||||
case AArch64::BI_InterlockedExchange16_rel:
|
||||
case AArch64::BI_InterlockedExchange_rel:
|
||||
case AArch64::BI_InterlockedExchange64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
|
||||
case AArch64::BI_InterlockedExchange8_nf:
|
||||
case AArch64::BI_InterlockedExchange16_nf:
|
||||
case AArch64::BI_InterlockedExchange_nf:
|
||||
case AArch64::BI_InterlockedExchange64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange16_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange_acq:
|
||||
case AArch64::BI_InterlockedCompareExchange64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange16_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange_rel:
|
||||
case AArch64::BI_InterlockedCompareExchange64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
|
||||
case AArch64::BI_InterlockedCompareExchange8_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange16_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange_nf:
|
||||
case AArch64::BI_InterlockedCompareExchange64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
|
||||
case AArch64::BI_InterlockedOr8_acq:
|
||||
case AArch64::BI_InterlockedOr16_acq:
|
||||
case AArch64::BI_InterlockedOr_acq:
|
||||
case AArch64::BI_InterlockedOr64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
|
||||
case AArch64::BI_InterlockedOr8_rel:
|
||||
case AArch64::BI_InterlockedOr16_rel:
|
||||
case AArch64::BI_InterlockedOr_rel:
|
||||
case AArch64::BI_InterlockedOr64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
|
||||
case AArch64::BI_InterlockedOr8_nf:
|
||||
case AArch64::BI_InterlockedOr16_nf:
|
||||
case AArch64::BI_InterlockedOr_nf:
|
||||
case AArch64::BI_InterlockedOr64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
|
||||
case AArch64::BI_InterlockedXor8_acq:
|
||||
case AArch64::BI_InterlockedXor16_acq:
|
||||
case AArch64::BI_InterlockedXor_acq:
|
||||
case AArch64::BI_InterlockedXor64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
|
||||
case AArch64::BI_InterlockedXor8_rel:
|
||||
case AArch64::BI_InterlockedXor16_rel:
|
||||
case AArch64::BI_InterlockedXor_rel:
|
||||
case AArch64::BI_InterlockedXor64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
|
||||
case AArch64::BI_InterlockedXor8_nf:
|
||||
case AArch64::BI_InterlockedXor16_nf:
|
||||
case AArch64::BI_InterlockedXor_nf:
|
||||
case AArch64::BI_InterlockedXor64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
|
||||
case AArch64::BI_InterlockedAnd8_acq:
|
||||
case AArch64::BI_InterlockedAnd16_acq:
|
||||
case AArch64::BI_InterlockedAnd_acq:
|
||||
case AArch64::BI_InterlockedAnd64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
|
||||
case AArch64::BI_InterlockedAnd8_rel:
|
||||
case AArch64::BI_InterlockedAnd16_rel:
|
||||
case AArch64::BI_InterlockedAnd_rel:
|
||||
case AArch64::BI_InterlockedAnd64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
|
||||
case AArch64::BI_InterlockedAnd8_nf:
|
||||
case AArch64::BI_InterlockedAnd16_nf:
|
||||
case AArch64::BI_InterlockedAnd_nf:
|
||||
case AArch64::BI_InterlockedAnd64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
|
||||
case AArch64::BI_InterlockedIncrement16_acq:
|
||||
case AArch64::BI_InterlockedIncrement_acq:
|
||||
case AArch64::BI_InterlockedIncrement64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
|
||||
case AArch64::BI_InterlockedIncrement16_rel:
|
||||
case AArch64::BI_InterlockedIncrement_rel:
|
||||
case AArch64::BI_InterlockedIncrement64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
|
||||
case AArch64::BI_InterlockedIncrement16_nf:
|
||||
case AArch64::BI_InterlockedIncrement_nf:
|
||||
case AArch64::BI_InterlockedIncrement64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
|
||||
case AArch64::BI_InterlockedDecrement16_acq:
|
||||
case AArch64::BI_InterlockedDecrement_acq:
|
||||
case AArch64::BI_InterlockedDecrement64_acq:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
|
||||
case AArch64::BI_InterlockedDecrement16_rel:
|
||||
case AArch64::BI_InterlockedDecrement_rel:
|
||||
case AArch64::BI_InterlockedDecrement64_rel:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
|
||||
case AArch64::BI_InterlockedDecrement16_nf:
|
||||
case AArch64::BI_InterlockedDecrement_nf:
|
||||
case AArch64::BI_InterlockedDecrement64_nf:
|
||||
return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
|
||||
|
||||
case AArch64::BI_InterlockedAdd: {
|
||||
Value *Arg0 = EmitScalarExpr(E->getArg(0));
|
||||
Value *Arg1 = EmitScalarExpr(E->getArg(1));
|
||||
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
|
||||
AtomicRMWInst::Add, Arg0, Arg1,
|
||||
llvm::AtomicOrdering::SequentiallyConsistent);
|
||||
return Builder.CreateAdd(RMWI, Arg1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1495,6 +1495,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
|
||||
// initializers throws an exception.
|
||||
SmallVector<EHScopeStack::stable_iterator, 16> cleanups;
|
||||
llvm::Instruction *cleanupDominator = nullptr;
|
||||
auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) {
|
||||
cleanups.push_back(cleanup);
|
||||
if (!cleanupDominator) // create placeholder once needed
|
||||
cleanupDominator = CGF.Builder.CreateAlignedLoad(
|
||||
CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy),
|
||||
CharUnits::One());
|
||||
};
|
||||
|
||||
unsigned curInitIndex = 0;
|
||||
|
||||
@ -1519,7 +1526,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
|
||||
if (QualType::DestructionKind dtorKind =
|
||||
Base.getType().isDestructedType()) {
|
||||
CGF.pushDestroy(dtorKind, V, Base.getType());
|
||||
cleanups.push_back(CGF.EHStack.stable_begin());
|
||||
addCleanup(CGF.EHStack.stable_begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1596,15 +1603,9 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
|
||||
= field->getType().isDestructedType()) {
|
||||
assert(LV.isSimple());
|
||||
if (CGF.needsEHCleanup(dtorKind)) {
|
||||
if (!cleanupDominator)
|
||||
cleanupDominator = CGF.Builder.CreateAlignedLoad(
|
||||
CGF.Int8Ty,
|
||||
llvm::Constant::getNullValue(CGF.Int8PtrTy),
|
||||
CharUnits::One()); // placeholder
|
||||
|
||||
CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(),
|
||||
CGF.getDestroyer(dtorKind), false);
|
||||
cleanups.push_back(CGF.EHStack.stable_begin());
|
||||
addCleanup(CGF.EHStack.stable_begin());
|
||||
pushedCleanup = true;
|
||||
}
|
||||
}
|
||||
@ -1620,6 +1621,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
|
||||
|
||||
// Deactivate all the partial cleanups in reverse order, which
|
||||
// generally means popping them.
|
||||
assert((cleanupDominator || cleanups.empty()) &&
|
||||
"Missing cleanupDominator before deactivating cleanup blocks");
|
||||
for (unsigned i = cleanups.size(); i != 0; --i)
|
||||
CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator);
|
||||
|
||||
|
@ -1846,11 +1846,9 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
|
||||
InputExpr->EvaluateAsRValue(EVResult, getContext(), true);
|
||||
|
||||
llvm::APSInt IntResult;
|
||||
if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
|
||||
getContext()))
|
||||
llvm_unreachable("Invalid immediate constant!");
|
||||
|
||||
return llvm::ConstantInt::get(getLLVMContext(), IntResult);
|
||||
if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
|
||||
getContext()))
|
||||
return llvm::ConstantInt::get(getLLVMContext(), IntResult);
|
||||
}
|
||||
|
||||
Expr::EvalResult Result;
|
||||
|
@ -1755,10 +1755,11 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
|
||||
CGCallee Callee = CGCallee::forVirtual(CE, GD, This, Ty);
|
||||
|
||||
QualType ThisTy;
|
||||
if (CE)
|
||||
ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
|
||||
else
|
||||
if (CE) {
|
||||
ThisTy = CE->getObjectType();
|
||||
} else {
|
||||
ThisTy = D->getDestroyedType();
|
||||
}
|
||||
|
||||
CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy, nullptr,
|
||||
QualType(), nullptr);
|
||||
|
@ -1921,10 +1921,11 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
|
||||
DtorType == Dtor_Deleting);
|
||||
|
||||
QualType ThisTy;
|
||||
if (CE)
|
||||
ThisTy = CE->getImplicitObjectArgument()->getType()->getPointeeType();
|
||||
else
|
||||
if (CE) {
|
||||
ThisTy = CE->getObjectType();
|
||||
} else {
|
||||
ThisTy = D->getDestroyedType();
|
||||
}
|
||||
|
||||
This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
|
||||
RValue RV = CGF.EmitCXXDestructorCall(GD, Callee, This.getPointer(), ThisTy,
|
||||
|
@ -9188,25 +9188,45 @@ static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
|
||||
namespace {
|
||||
class RISCVABIInfo : public DefaultABIInfo {
|
||||
private:
|
||||
unsigned XLen; // Size of the integer ('x') registers in bits.
|
||||
// Size of the integer ('x') registers in bits.
|
||||
unsigned XLen;
|
||||
// Size of the floating point ('f') registers in bits. Note that the target
|
||||
// ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
|
||||
// with soft float ABI has FLen==0).
|
||||
unsigned FLen;
|
||||
static const int NumArgGPRs = 8;
|
||||
static const int NumArgFPRs = 8;
|
||||
bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
|
||||
llvm::Type *&Field1Ty,
|
||||
CharUnits &Field1Off,
|
||||
llvm::Type *&Field2Ty,
|
||||
CharUnits &Field2Off) const;
|
||||
|
||||
public:
|
||||
RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
|
||||
: DefaultABIInfo(CGT), XLen(XLen) {}
|
||||
RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
|
||||
: DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}
|
||||
|
||||
// DefaultABIInfo's classifyReturnType and classifyArgumentType are
|
||||
// non-virtual, but computeInfo is virtual, so we overload it.
|
||||
void computeInfo(CGFunctionInfo &FI) const override;
|
||||
|
||||
ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed,
|
||||
int &ArgGPRsLeft) const;
|
||||
ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
|
||||
int &ArgFPRsLeft) const;
|
||||
ABIArgInfo classifyReturnType(QualType RetTy) const;
|
||||
|
||||
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
||||
QualType Ty) const override;
|
||||
|
||||
ABIArgInfo extendType(QualType Ty) const;
|
||||
|
||||
bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
|
||||
CharUnits &Field1Off, llvm::Type *&Field2Ty,
|
||||
CharUnits &Field2Off, int &NeededArgGPRs,
|
||||
int &NeededArgFPRs) const;
|
||||
ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
|
||||
CharUnits Field1Off,
|
||||
llvm::Type *Field2Ty,
|
||||
CharUnits Field2Off) const;
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -9228,18 +9248,215 @@ void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
||||
// different for variadic arguments, we must also track whether we are
|
||||
// examining a vararg or not.
|
||||
int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
|
||||
int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
|
||||
int NumFixedArgs = FI.getNumRequiredArgs();
|
||||
|
||||
int ArgNum = 0;
|
||||
for (auto &ArgInfo : FI.arguments()) {
|
||||
bool IsFixed = ArgNum < NumFixedArgs;
|
||||
ArgInfo.info = classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft);
|
||||
ArgInfo.info =
|
||||
classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
|
||||
ArgNum++;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the struct is a potential candidate for the floating point
|
||||
// calling convention. If this function returns true, the caller is
|
||||
// responsible for checking that if there is only a single field then that
|
||||
// field is a float.
|
||||
bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
|
||||
llvm::Type *&Field1Ty,
|
||||
CharUnits &Field1Off,
|
||||
llvm::Type *&Field2Ty,
|
||||
CharUnits &Field2Off) const {
|
||||
bool IsInt = Ty->isIntegralOrEnumerationType();
|
||||
bool IsFloat = Ty->isRealFloatingType();
|
||||
|
||||
if (IsInt || IsFloat) {
|
||||
uint64_t Size = getContext().getTypeSize(Ty);
|
||||
if (IsInt && Size > XLen)
|
||||
return false;
|
||||
// Can't be eligible if larger than the FP registers. Half precision isn't
|
||||
// currently supported on RISC-V and the ABI hasn't been confirmed, so
|
||||
// default to the integer ABI in that case.
|
||||
if (IsFloat && (Size > FLen || Size < 32))
|
||||
return false;
|
||||
// Can't be eligible if an integer type was already found (int+int pairs
|
||||
// are not eligible).
|
||||
if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
|
||||
return false;
|
||||
if (!Field1Ty) {
|
||||
Field1Ty = CGT.ConvertType(Ty);
|
||||
Field1Off = CurOff;
|
||||
return true;
|
||||
}
|
||||
if (!Field2Ty) {
|
||||
Field2Ty = CGT.ConvertType(Ty);
|
||||
Field2Off = CurOff;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (auto CTy = Ty->getAs<ComplexType>()) {
|
||||
if (Field1Ty)
|
||||
return false;
|
||||
QualType EltTy = CTy->getElementType();
|
||||
if (getContext().getTypeSize(EltTy) > FLen)
|
||||
return false;
|
||||
Field1Ty = CGT.ConvertType(EltTy);
|
||||
Field1Off = CurOff;
|
||||
assert(CurOff.isZero() && "Unexpected offset for first field");
|
||||
Field2Ty = Field1Ty;
|
||||
Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
|
||||
uint64_t ArraySize = ATy->getSize().getZExtValue();
|
||||
QualType EltTy = ATy->getElementType();
|
||||
CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
|
||||
for (uint64_t i = 0; i < ArraySize; ++i) {
|
||||
bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
|
||||
Field1Off, Field2Ty, Field2Off);
|
||||
if (!Ret)
|
||||
return false;
|
||||
CurOff += EltSize;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (const auto *RTy = Ty->getAs<RecordType>()) {
|
||||
// Structures with either a non-trivial destructor or a non-trivial
|
||||
// copy constructor are not eligible for the FP calling convention.
|
||||
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, CGT.getCXXABI()))
|
||||
return false;
|
||||
if (isEmptyRecord(getContext(), Ty, true))
|
||||
return true;
|
||||
const RecordDecl *RD = RTy->getDecl();
|
||||
// Unions aren't eligible unless they're empty (which is caught above).
|
||||
if (RD->isUnion())
|
||||
return false;
|
||||
int ZeroWidthBitFieldCount = 0;
|
||||
for (const FieldDecl *FD : RD->fields()) {
|
||||
const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
|
||||
uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
|
||||
QualType QTy = FD->getType();
|
||||
if (FD->isBitField()) {
|
||||
unsigned BitWidth = FD->getBitWidthValue(getContext());
|
||||
// Allow a bitfield with a type greater than XLen as long as the
|
||||
// bitwidth is XLen or less.
|
||||
if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
|
||||
QTy = getContext().getIntTypeForBitwidth(XLen, false);
|
||||
if (BitWidth == 0) {
|
||||
ZeroWidthBitFieldCount++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
bool Ret = detectFPCCEligibleStructHelper(
|
||||
QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
|
||||
Field1Ty, Field1Off, Field2Ty, Field2Off);
|
||||
if (!Ret)
|
||||
return false;
|
||||
|
||||
// As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
|
||||
// or int+fp structs, but are ignored for a struct with an fp field and
|
||||
// any number of zero-width bitfields.
|
||||
if (Field2Ty && ZeroWidthBitFieldCount > 0)
|
||||
return false;
|
||||
}
|
||||
return Field1Ty != nullptr;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Determine if a struct is eligible for passing according to the floating
|
||||
// point calling convention (i.e., when flattened it contains a single fp
|
||||
// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
|
||||
// NeededArgGPRs are incremented appropriately.
|
||||
bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
|
||||
CharUnits &Field1Off,
|
||||
llvm::Type *&Field2Ty,
|
||||
CharUnits &Field2Off,
|
||||
int &NeededArgGPRs,
|
||||
int &NeededArgFPRs) const {
|
||||
Field1Ty = nullptr;
|
||||
Field2Ty = nullptr;
|
||||
NeededArgGPRs = 0;
|
||||
NeededArgFPRs = 0;
|
||||
bool IsCandidate = detectFPCCEligibleStructHelper(
|
||||
Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
|
||||
// Not really a candidate if we have a single int but no float.
|
||||
if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
|
||||
return IsCandidate = false;
|
||||
if (!IsCandidate)
|
||||
return false;
|
||||
if (Field1Ty && Field1Ty->isFloatingPointTy())
|
||||
NeededArgFPRs++;
|
||||
else if (Field1Ty)
|
||||
NeededArgGPRs++;
|
||||
if (Field2Ty && Field2Ty->isFloatingPointTy())
|
||||
NeededArgFPRs++;
|
||||
else if (Field2Ty)
|
||||
NeededArgGPRs++;
|
||||
return IsCandidate;
|
||||
}
|
||||
|
||||
// Call getCoerceAndExpand for the two-element flattened struct described by
|
||||
// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
|
||||
// appropriate coerceToType and unpaddedCoerceToType.
|
||||
ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
|
||||
llvm::Type *Field1Ty, CharUnits Field1Off, llvm::Type *Field2Ty,
|
||||
CharUnits Field2Off) const {
|
||||
SmallVector<llvm::Type *, 3> CoerceElts;
|
||||
SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
|
||||
if (!Field1Off.isZero())
|
||||
CoerceElts.push_back(llvm::ArrayType::get(
|
||||
llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));
|
||||
|
||||
CoerceElts.push_back(Field1Ty);
|
||||
UnpaddedCoerceElts.push_back(Field1Ty);
|
||||
|
||||
if (!Field2Ty) {
|
||||
return ABIArgInfo::getCoerceAndExpand(
|
||||
llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
|
||||
UnpaddedCoerceElts[0]);
|
||||
}
|
||||
|
||||
CharUnits Field2Align =
|
||||
CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
|
||||
CharUnits Field1Size =
|
||||
CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
|
||||
CharUnits Field2OffNoPadNoPack = Field1Size.alignTo(Field2Align);
|
||||
|
||||
CharUnits Padding = CharUnits::Zero();
|
||||
if (Field2Off > Field2OffNoPadNoPack)
|
||||
Padding = Field2Off - Field2OffNoPadNoPack;
|
||||
else if (Field2Off != Field2Align && Field2Off > Field1Size)
|
||||
Padding = Field2Off - Field1Size;
|
||||
|
||||
bool IsPacked = !Field2Off.isMultipleOf(Field2Align);
|
||||
|
||||
if (!Padding.isZero())
|
||||
CoerceElts.push_back(llvm::ArrayType::get(
|
||||
llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));
|
||||
|
||||
CoerceElts.push_back(Field2Ty);
|
||||
UnpaddedCoerceElts.push_back(Field2Ty);
|
||||
|
||||
auto CoerceToType =
|
||||
llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
|
||||
auto UnpaddedCoerceToType =
|
||||
llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);
|
||||
|
||||
return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
|
||||
}
|
||||
|
||||
ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
|
||||
int &ArgGPRsLeft) const {
|
||||
int &ArgGPRsLeft,
|
||||
int &ArgFPRsLeft) const {
|
||||
assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
|
||||
Ty = useFirstFieldIfTransparentUnion(Ty);
|
||||
|
||||
@ -9257,6 +9474,42 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
|
||||
return ABIArgInfo::getIgnore();
|
||||
|
||||
uint64_t Size = getContext().getTypeSize(Ty);
|
||||
|
||||
// Pass floating point values via FPRs if possible.
|
||||
if (IsFixed && Ty->isFloatingType() && FLen >= Size && ArgFPRsLeft) {
|
||||
ArgFPRsLeft--;
|
||||
return ABIArgInfo::getDirect();
|
||||
}
|
||||
|
||||
// Complex types for the hard float ABI must be passed direct rather than
|
||||
// using CoerceAndExpand.
|
||||
if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
|
||||
QualType EltTy = Ty->getAs<ComplexType>()->getElementType();
|
||||
if (getContext().getTypeSize(EltTy) <= FLen) {
|
||||
ArgFPRsLeft -= 2;
|
||||
return ABIArgInfo::getDirect();
|
||||
}
|
||||
}
|
||||
|
||||
if (IsFixed && FLen && Ty->isStructureOrClassType()) {
|
||||
llvm::Type *Field1Ty = nullptr;
|
||||
llvm::Type *Field2Ty = nullptr;
|
||||
CharUnits Field1Off = CharUnits::Zero();
|
||||
CharUnits Field2Off = CharUnits::Zero();
|
||||
int NeededArgGPRs;
|
||||
int NeededArgFPRs;
|
||||
bool IsCandidate =
|
||||
detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
|
||||
NeededArgGPRs, NeededArgFPRs);
|
||||
if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
|
||||
NeededArgFPRs <= ArgFPRsLeft) {
|
||||
ArgGPRsLeft -= NeededArgGPRs;
|
||||
ArgFPRsLeft -= NeededArgFPRs;
|
||||
return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
|
||||
Field2Off);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t NeededAlign = getContext().getTypeAlign(Ty);
|
||||
bool MustUseStack = false;
|
||||
// Determine the number of GPRs needed to pass the current argument
|
||||
@ -9315,10 +9568,12 @@ ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
|
||||
return ABIArgInfo::getIgnore();
|
||||
|
||||
int ArgGPRsLeft = 2;
|
||||
int ArgFPRsLeft = FLen ? 2 : 0;
|
||||
|
||||
// The rules for return and argument types are the same, so defer to
|
||||
// classifyArgumentType.
|
||||
return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft);
|
||||
return classifyArgumentType(RetTy, /*IsFixed=*/true, ArgGPRsLeft,
|
||||
ArgFPRsLeft);
|
||||
}
|
||||
|
||||
Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
|
||||
@ -9353,8 +9608,9 @@ ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
|
||||
namespace {
|
||||
class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
|
||||
public:
|
||||
RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen)
|
||||
: TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen)) {}
|
||||
RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
|
||||
unsigned FLen)
|
||||
: TargetCodeGenInfo(new RISCVABIInfo(CGT, XLen, FLen)) {}
|
||||
|
||||
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
|
||||
CodeGen::CodeGenModule &CGM) const override {
|
||||
@ -9493,9 +9749,16 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
|
||||
return SetCGInfo(new MSP430TargetCodeGenInfo(Types));
|
||||
|
||||
case llvm::Triple::riscv32:
|
||||
return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 32));
|
||||
case llvm::Triple::riscv64:
|
||||
return SetCGInfo(new RISCVTargetCodeGenInfo(Types, 64));
|
||||
case llvm::Triple::riscv64: {
|
||||
StringRef ABIStr = getTarget().getABI();
|
||||
unsigned XLen = getTarget().getPointerWidth(0);
|
||||
unsigned ABIFLen = 0;
|
||||
if (ABIStr.endswith("f"))
|
||||
ABIFLen = 32;
|
||||
else if (ABIStr.endswith("d"))
|
||||
ABIFLen = 64;
|
||||
return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
|
||||
}
|
||||
|
||||
case llvm::Triple::systemz: {
|
||||
bool HasVector = getTarget().getABI() == "vector";
|
||||
|
@ -24,7 +24,6 @@
|
||||
#include <vector>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <linux/version.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/inotify.h>
|
||||
#include <unistd.h>
|
||||
@ -335,7 +334,7 @@ std::unique_ptr<DirectoryWatcher> clang::DirectoryWatcher::create(
|
||||
InotifyFD, Path.str().c_str(),
|
||||
IN_CREATE | IN_DELETE | IN_DELETE_SELF | IN_MODIFY |
|
||||
IN_MOVED_FROM | IN_MOVE_SELF | IN_MOVED_TO | IN_ONLYDIR | IN_IGNORED
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
||||
#ifdef IN_EXCL_UNLINK
|
||||
| IN_EXCL_UNLINK
|
||||
#endif
|
||||
);
|
||||
|
@ -501,8 +501,6 @@ static codegenoptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
|
||||
return codegenoptions::LimitedDebugInfo;
|
||||
}
|
||||
|
||||
enum class FramePointerKind { None, NonLeaf, All };
|
||||
|
||||
static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
|
||||
switch (Triple.getArch()){
|
||||
default:
|
||||
@ -517,9 +515,6 @@ static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) {
|
||||
|
||||
static bool useFramePointerForTargetByDefault(const ArgList &Args,
|
||||
const llvm::Triple &Triple) {
|
||||
if (Args.hasArg(options::OPT_pg))
|
||||
return true;
|
||||
|
||||
switch (Triple.getArch()) {
|
||||
case llvm::Triple::xcore:
|
||||
case llvm::Triple::wasm32:
|
||||
@ -579,22 +574,32 @@ static bool useFramePointerForTargetByDefault(const ArgList &Args,
|
||||
return true;
|
||||
}
|
||||
|
||||
static FramePointerKind getFramePointerKind(const ArgList &Args,
|
||||
const llvm::Triple &Triple) {
|
||||
Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
|
||||
options::OPT_fno_omit_frame_pointer);
|
||||
bool OmitFP = A && A->getOption().matches(options::OPT_fomit_frame_pointer);
|
||||
bool NoOmitFP =
|
||||
A && A->getOption().matches(options::OPT_fno_omit_frame_pointer);
|
||||
if (NoOmitFP || mustUseNonLeafFramePointerForTarget(Triple) ||
|
||||
(!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) {
|
||||
if (Args.hasFlag(options::OPT_momit_leaf_frame_pointer,
|
||||
options::OPT_mno_omit_leaf_frame_pointer,
|
||||
Triple.isPS4CPU()))
|
||||
return FramePointerKind::NonLeaf;
|
||||
return FramePointerKind::All;
|
||||
}
|
||||
return FramePointerKind::None;
|
||||
static bool shouldUseFramePointer(const ArgList &Args,
|
||||
const llvm::Triple &Triple) {
|
||||
if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer,
|
||||
options::OPT_fomit_frame_pointer))
|
||||
return A->getOption().matches(options::OPT_fno_omit_frame_pointer) ||
|
||||
mustUseNonLeafFramePointerForTarget(Triple);
|
||||
|
||||
if (Args.hasArg(options::OPT_pg))
|
||||
return true;
|
||||
|
||||
return useFramePointerForTargetByDefault(Args, Triple);
|
||||
}
|
||||
|
||||
static bool shouldUseLeafFramePointer(const ArgList &Args,
|
||||
const llvm::Triple &Triple) {
|
||||
if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer,
|
||||
options::OPT_momit_leaf_frame_pointer))
|
||||
return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer);
|
||||
|
||||
if (Args.hasArg(options::OPT_pg))
|
||||
return true;
|
||||
|
||||
if (Triple.isPS4CPU())
|
||||
return false;
|
||||
|
||||
return useFramePointerForTargetByDefault(Args, Triple);
|
||||
}
|
||||
|
||||
/// Add a CC1 option to specify the debug compilation directory.
|
||||
@ -3946,12 +3951,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
if (Args.hasFlag(options::OPT_mrtd, options::OPT_mno_rtd, false))
|
||||
CmdArgs.push_back("-fdefault-calling-conv=stdcall");
|
||||
|
||||
FramePointerKind FPKeepKind = getFramePointerKind(Args, RawTriple);
|
||||
if (FPKeepKind != FramePointerKind::None) {
|
||||
if (shouldUseFramePointer(Args, RawTriple))
|
||||
CmdArgs.push_back("-mdisable-fp-elim");
|
||||
if (FPKeepKind == FramePointerKind::NonLeaf)
|
||||
CmdArgs.push_back("-momit-leaf-frame-pointer");
|
||||
}
|
||||
if (!Args.hasFlag(options::OPT_fzero_initialized_in_bss,
|
||||
options::OPT_fno_zero_initialized_in_bss))
|
||||
CmdArgs.push_back("-mno-zero-initialized-in-bss");
|
||||
@ -4136,6 +4137,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
CmdArgs.push_back(A->getValue());
|
||||
}
|
||||
|
||||
if (!shouldUseLeafFramePointer(Args, RawTriple))
|
||||
CmdArgs.push_back("-momit-leaf-frame-pointer");
|
||||
|
||||
// Explicitly error on some things we know we don't support and can't just
|
||||
// ignore.
|
||||
if (!Args.hasArg(options::OPT_fallow_unsupported)) {
|
||||
@ -5489,7 +5493,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
|
||||
}
|
||||
|
||||
if (Arg *A = Args.getLastArg(options::OPT_pg))
|
||||
if (FPKeepKind == FramePointerKind::None)
|
||||
if (!shouldUseFramePointer(Args, Triple))
|
||||
D.Diag(diag::err_drv_argument_not_allowed_with) << "-fomit-frame-pointer"
|
||||
<< A->getAsString(Args);
|
||||
|
||||
|
@ -435,7 +435,6 @@ void ASTUnit::CacheCodeCompletionResults() {
|
||||
| (1LL << CodeCompletionContext::CCC_UnionTag)
|
||||
| (1LL << CodeCompletionContext::CCC_ClassOrStructTag)
|
||||
| (1LL << CodeCompletionContext::CCC_Type)
|
||||
| (1LL << CodeCompletionContext::CCC_Symbol)
|
||||
| (1LL << CodeCompletionContext::CCC_SymbolOrNewName)
|
||||
| (1LL << CodeCompletionContext::CCC_ParenthesizedExpression);
|
||||
|
||||
|
@ -2408,7 +2408,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
|
||||
.Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11)
|
||||
.Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12)
|
||||
.Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20)
|
||||
.Case("c++", LangStandard::lang_openclcpp)
|
||||
.Cases("clc++", "CLC++", LangStandard::lang_openclcpp)
|
||||
.Default(LangStandard::lang_unspecified);
|
||||
|
||||
if (OpenCLLangStd == LangStandard::lang_unspecified) {
|
||||
|
@ -437,17 +437,17 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
|
||||
default:
|
||||
llvm_unreachable("Unsupported OpenCL version");
|
||||
}
|
||||
Builder.defineMacro("CL_VERSION_1_0", "100");
|
||||
Builder.defineMacro("CL_VERSION_1_1", "110");
|
||||
Builder.defineMacro("CL_VERSION_1_2", "120");
|
||||
Builder.defineMacro("CL_VERSION_2_0", "200");
|
||||
|
||||
if (TI.isLittleEndian())
|
||||
Builder.defineMacro("__ENDIAN_LITTLE__");
|
||||
|
||||
if (LangOpts.FastRelaxedMath)
|
||||
Builder.defineMacro("__FAST_RELAXED_MATH__");
|
||||
}
|
||||
Builder.defineMacro("CL_VERSION_1_0", "100");
|
||||
Builder.defineMacro("CL_VERSION_1_1", "110");
|
||||
Builder.defineMacro("CL_VERSION_1_2", "120");
|
||||
Builder.defineMacro("CL_VERSION_2_0", "200");
|
||||
|
||||
if (TI.isLittleEndian())
|
||||
Builder.defineMacro("__ENDIAN_LITTLE__");
|
||||
|
||||
if (LangOpts.FastRelaxedMath)
|
||||
Builder.defineMacro("__FAST_RELAXED_MATH__");
|
||||
}
|
||||
// Not "standard" per se, but available even with the -undef flag.
|
||||
if (LangOpts.AsmPreprocessor)
|
||||
|
@ -4029,7 +4029,7 @@ _mm_storeu_si128(__m128i_u *__p, __m128i __b)
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si64(void const *__p, __m128i __b)
|
||||
_mm_storeu_si64(void *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si64 {
|
||||
long long __v;
|
||||
@ -4050,7 +4050,7 @@ _mm_storeu_si64(void const *__p, __m128i __b)
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si32(void const *__p, __m128i __b)
|
||||
_mm_storeu_si32(void *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si32 {
|
||||
int __v;
|
||||
@ -4071,7 +4071,7 @@ _mm_storeu_si32(void const *__p, __m128i __b)
|
||||
/// \param __b
|
||||
/// A 128-bit integer vector containing the value to be stored.
|
||||
static __inline__ void __DEFAULT_FN_ATTRS
|
||||
_mm_storeu_si16(void const *__p, __m128i __b)
|
||||
_mm_storeu_si16(void *__p, __m128i __b)
|
||||
{
|
||||
struct __storeu_si16 {
|
||||
short __v;
|
||||
|
@ -126,7 +126,7 @@ typedef double double8 __attribute__((ext_vector_type(8)));
|
||||
typedef double double16 __attribute__((ext_vector_type(16)));
|
||||
#endif
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#define NULL ((void*)0)
|
||||
#endif
|
||||
|
||||
@ -276,7 +276,7 @@ typedef uint cl_mem_fence_flags;
|
||||
*/
|
||||
#define CLK_GLOBAL_MEM_FENCE 0x02
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
typedef enum memory_scope {
|
||||
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
|
||||
@ -288,9 +288,6 @@ typedef enum memory_scope {
|
||||
#endif
|
||||
} memory_scope;
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
/**
|
||||
* Queue a memory fence to ensure correct ordering of memory
|
||||
* operations between work-items of a work-group to
|
||||
@ -313,7 +310,7 @@ typedef enum memory_order
|
||||
memory_order_seq_cst = __ATOMIC_SEQ_CST
|
||||
} memory_order;
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions
|
||||
|
||||
@ -389,14 +386,10 @@ typedef enum memory_order
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
// OpenCL v2.0 s6.13.16 - Pipe Functions
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t))
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
|
||||
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
|
||||
#define CL_COMPLETE 0x0
|
||||
#define CL_RUNNING 0x1
|
||||
#define CL_SUBMITTED 0x2
|
||||
@ -435,7 +428,7 @@ typedef struct {
|
||||
size_t localWorkSize[MAX_WORK_DIM];
|
||||
} ndrange_t;
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
#ifdef cl_intel_device_side_avc_motion_estimation
|
||||
#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin
|
||||
|
@ -11,11 +11,11 @@
|
||||
|
||||
#include "opencl-c-base.h"
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifndef cl_khr_depth_images
|
||||
#define cl_khr_depth_images
|
||||
#endif //cl_khr_depth_images
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
#if __OPENCL_C_VERSION__ < CL_VERSION_2_0
|
||||
#ifdef cl_khr_3d_image_writes
|
||||
@ -23,10 +23,10 @@
|
||||
#endif //cl_khr_3d_image_writes
|
||||
#endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : begin
|
||||
#pragma OPENCL EXTENSION cl_intel_planar_yuv : end
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
|
||||
#define __ovld __attribute__((overloadable))
|
||||
#define __conv __attribute__((convergent))
|
||||
@ -6517,11 +6517,11 @@ size_t __ovld __cnfn get_group_id(uint dimindx);
|
||||
*/
|
||||
size_t __ovld __cnfn get_global_offset(uint dimindx);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
size_t __ovld get_enqueued_local_size(uint dimindx);
|
||||
size_t __ovld get_global_linear_id(void);
|
||||
size_t __ovld get_local_linear_id(void);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.2, v1.2 s6.12.2, v2.0 s6.13.2 - Math functions
|
||||
|
||||
@ -7352,7 +7352,7 @@ half16 __ovld __cnfn fmod(half16 x, half16 y);
|
||||
* Returns fmin(x - floor (x), 0x1.fffffep-1f ).
|
||||
* floor(x) is returned in iptr.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld fract(float x, float *iptr);
|
||||
float2 __ovld fract(float2 x, float2 *iptr);
|
||||
float3 __ovld fract(float3 x, float3 *iptr);
|
||||
@ -7434,7 +7434,7 @@ half4 __ovld fract(half4 x, __private half4 *iptr);
|
||||
half8 __ovld fract(half8 x, __private half8 *iptr);
|
||||
half16 __ovld fract(half16 x, __private half16 *iptr);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Extract mantissa and exponent from x. For each
|
||||
@ -7442,7 +7442,7 @@ half16 __ovld fract(half16 x, __private half16 *iptr);
|
||||
* magnitude in the interval [1/2, 1) or 0. Each
|
||||
* component of x equals mantissa returned * 2^exp.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld frexp(float x, int *exp);
|
||||
float2 __ovld frexp(float2 x, int2 *exp);
|
||||
float3 __ovld frexp(float3 x, int3 *exp);
|
||||
@ -7524,7 +7524,7 @@ half4 __ovld frexp(half4 x, __private int4 *exp);
|
||||
half8 __ovld frexp(half8 x, __private int8 *exp);
|
||||
half16 __ovld frexp(half16 x, __private int16 *exp);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Compute the value of the square root of x^2 + y^2
|
||||
@ -7649,7 +7649,7 @@ half8 __ovld __cnfn lgamma(half8 x);
|
||||
half16 __ovld __cnfn lgamma(half16 x);
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld lgamma_r(float x, int *signp);
|
||||
float2 __ovld lgamma_r(float2 x, int2 *signp);
|
||||
float3 __ovld lgamma_r(float3 x, int3 *signp);
|
||||
@ -7731,7 +7731,7 @@ half4 __ovld lgamma_r(half4 x, __private int4 *signp);
|
||||
half8 __ovld lgamma_r(half8 x, __private int8 *signp);
|
||||
half16 __ovld lgamma_r(half16 x, __private int16 *signp);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Compute natural logarithm.
|
||||
@ -7955,7 +7955,7 @@ half16 __ovld __cnfn minmag(half16 x, half16 y);
|
||||
* the argument. It stores the integral part in the object
|
||||
* pointed to by iptr.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld modf(float x, float *iptr);
|
||||
float2 __ovld modf(float2 x, float2 *iptr);
|
||||
float3 __ovld modf(float3 x, float3 *iptr);
|
||||
@ -8037,7 +8037,7 @@ half4 __ovld modf(half4 x, __private half4 *iptr);
|
||||
half8 __ovld modf(half8 x, __private half8 *iptr);
|
||||
half16 __ovld modf(half16 x, __private half16 *iptr);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Returns a quiet NaN. The nancode may be placed
|
||||
@ -8215,7 +8215,7 @@ half16 __ovld __cnfn remainder(half16 x, half16 y);
|
||||
* sign as x/y. It stores this signed value in the object
|
||||
* pointed to by quo.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld remquo(float x, float y, int *quo);
|
||||
float2 __ovld remquo(float2 x, float2 y, int2 *quo);
|
||||
float3 __ovld remquo(float3 x, float3 y, int3 *quo);
|
||||
@ -8298,7 +8298,7 @@ half4 __ovld remquo(half4 x, half4 y, __private int4 *quo);
|
||||
half8 __ovld remquo(half8 x, half8 y, __private int8 *quo);
|
||||
half16 __ovld remquo(half16 x, half16 y, __private int16 *quo);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
/**
|
||||
* Round to integral value (using round to nearest
|
||||
* even rounding mode) in floating-point format.
|
||||
@ -8439,7 +8439,7 @@ half16 __ovld __cnfn sin(half16);
|
||||
* is the return value and computed cosine is returned
|
||||
* in cosval.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld sincos(float x, float *cosval);
|
||||
float2 __ovld sincos(float2 x, float2 *cosval);
|
||||
float3 __ovld sincos(float3 x, float3 *cosval);
|
||||
@ -8521,7 +8521,7 @@ half4 __ovld sincos(half4 x, __private half4 *cosval);
|
||||
half8 __ovld sincos(half8 x, __private half8 *cosval);
|
||||
half16 __ovld sincos(half16 x, __private half16 *cosval);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Compute hyperbolic sine.
|
||||
@ -9446,7 +9446,7 @@ ulong16 __ovld __cnfn clz(ulong16 x);
|
||||
* returns the size in bits of the type of x or
|
||||
* component type of x, if x is a vector.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
char __ovld ctz(char x);
|
||||
uchar __ovld ctz(uchar x);
|
||||
char2 __ovld ctz(char2 x);
|
||||
@ -9495,7 +9495,7 @@ long8 __ovld ctz(long8 x);
|
||||
ulong8 __ovld ctz(ulong8 x);
|
||||
long16 __ovld ctz(long16 x);
|
||||
ulong16 __ovld ctz(ulong16 x);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Returns mul_hi(a, b) + c.
|
||||
@ -11340,7 +11340,7 @@ half8 __ovld vload8(size_t offset, const __constant half *p);
|
||||
half16 __ovld vload16(size_t offset, const __constant half *p);
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
char2 __ovld vload2(size_t offset, const char *p);
|
||||
uchar2 __ovld vload2(size_t offset, const uchar *p);
|
||||
short2 __ovld vload2(size_t offset, const short *p);
|
||||
@ -11578,9 +11578,9 @@ half4 __ovld vload4(size_t offset, const __private half *p);
|
||||
half8 __ovld vload8(size_t offset, const __private half *p);
|
||||
half16 __ovld vload16(size_t offset, const __private half *p);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld vstore2(char2 data, size_t offset, char *p);
|
||||
void __ovld vstore2(uchar2 data, size_t offset, uchar *p);
|
||||
void __ovld vstore2(short2 data, size_t offset, short *p);
|
||||
@ -11814,7 +11814,7 @@ void __ovld vstore4(half4 data, size_t offset, __private half *p);
|
||||
void __ovld vstore8(half8 data, size_t offset, __private half *p);
|
||||
void __ovld vstore16(half16 data, size_t offset, __private half *p);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Read sizeof (half) bytes of data from address
|
||||
@ -11825,13 +11825,13 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p);
|
||||
* must be 16-bit aligned.
|
||||
*/
|
||||
float __ovld vload_half(size_t offset, const __constant half *p);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld vload_half(size_t offset, const half *p);
|
||||
#else
|
||||
float __ovld vload_half(size_t offset, const __global half *p);
|
||||
float __ovld vload_half(size_t offset, const __local half *p);
|
||||
float __ovld vload_half(size_t offset, const __private half *p);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Read sizeof (halfn) bytes of data from address
|
||||
@ -11846,7 +11846,7 @@ float3 __ovld vload_half3(size_t offset, const __constant half *p);
|
||||
float4 __ovld vload_half4(size_t offset, const __constant half *p);
|
||||
float8 __ovld vload_half8(size_t offset, const __constant half *p);
|
||||
float16 __ovld vload_half16(size_t offset, const __constant half *p);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float2 __ovld vload_half2(size_t offset, const half *p);
|
||||
float3 __ovld vload_half3(size_t offset, const half *p);
|
||||
float4 __ovld vload_half4(size_t offset, const half *p);
|
||||
@ -11868,7 +11868,7 @@ float3 __ovld vload_half3(size_t offset, const __private half *p);
|
||||
float4 __ovld vload_half4(size_t offset, const __private half *p);
|
||||
float8 __ovld vload_half8(size_t offset, const __private half *p);
|
||||
float16 __ovld vload_half16(size_t offset, const __private half *p);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* The float value given by data is first
|
||||
@ -11881,7 +11881,7 @@ float16 __ovld vload_half16(size_t offset, const __private half *p);
|
||||
* The default current rounding mode is round to
|
||||
* nearest even.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld vstore_half(float data, size_t offset, half *p);
|
||||
void __ovld vstore_half_rte(float data, size_t offset, half *p);
|
||||
void __ovld vstore_half_rtz(float data, size_t offset, half *p);
|
||||
@ -11927,7 +11927,7 @@ void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);
|
||||
void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);
|
||||
void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
|
||||
#endif //cl_khr_fp64
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* The floatn value given by data is converted to
|
||||
@ -11940,7 +11940,7 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
|
||||
* The default current rounding mode is round to
|
||||
* nearest even.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld vstore_half2(float2 data, size_t offset, half *p);
|
||||
void __ovld vstore_half3(float3 data, size_t offset, half *p);
|
||||
void __ovld vstore_half4(float4 data, size_t offset, half *p);
|
||||
@ -12146,7 +12146,7 @@ void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);
|
||||
void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);
|
||||
void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
|
||||
#endif //cl_khr_fp64
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* For n = 1, 2, 4, 8 and 16 read sizeof (halfn)
|
||||
@ -12167,7 +12167,7 @@ float3 __ovld vloada_half3(size_t offset, const __constant half *p);
|
||||
float4 __ovld vloada_half4(size_t offset, const __constant half *p);
|
||||
float8 __ovld vloada_half8(size_t offset, const __constant half *p);
|
||||
float16 __ovld vloada_half16(size_t offset, const __constant half *p);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float __ovld vloada_half(size_t offset, const half *p);
|
||||
float2 __ovld vloada_half2(size_t offset, const half *p);
|
||||
float3 __ovld vloada_half3(size_t offset, const half *p);
|
||||
@ -12193,7 +12193,7 @@ float3 __ovld vloada_half3(size_t offset, const __private half *p);
|
||||
float4 __ovld vloada_half4(size_t offset, const __private half *p);
|
||||
float8 __ovld vloada_half8(size_t offset, const __private half *p);
|
||||
float16 __ovld vloada_half16(size_t offset, const __private half *p);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* The floatn value given by data is converted to
|
||||
@ -12211,7 +12211,7 @@ float16 __ovld vloada_half16(size_t offset, const __private half *p);
|
||||
* mode. The default current rounding mode is
|
||||
* round to nearest even.
|
||||
*/
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld vstorea_half(float data, size_t offset, half *p);
|
||||
void __ovld vstorea_half2(float2 data, size_t offset, half *p);
|
||||
void __ovld vstorea_half3(float3 data, size_t offset, half *p);
|
||||
@ -12496,7 +12496,7 @@ void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);
|
||||
void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);
|
||||
void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
|
||||
#endif //cl_khr_fp64
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions
|
||||
|
||||
@ -12532,10 +12532,10 @@ void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
|
||||
|
||||
void __ovld __conv barrier(cl_mem_fence_flags flags);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
|
||||
void __ovld __conv work_group_barrier(cl_mem_fence_flags flags);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.9, v1.2 s6.12.9 - Explicit Memory Fence Functions
|
||||
|
||||
@ -12580,7 +12580,7 @@ void __ovld write_mem_fence(cl_mem_fence_flags flags);
|
||||
|
||||
// OpenCL v2.0 s6.13.9 - Address Space Qualifier Functions
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
cl_mem_fence_flags __ovld get_fence(const void *ptr);
|
||||
cl_mem_fence_flags __ovld get_fence(void *ptr);
|
||||
|
||||
@ -12591,7 +12591,7 @@ cl_mem_fence_flags __ovld get_fence(void *ptr);
|
||||
* where gentype is builtin type or user defined type.
|
||||
*/
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.10, v1.2 s6.12.10, v2.0 s6.13.10 - Async Copies from Global to Local Memory, Local to Global Memory, and Prefetch
|
||||
|
||||
@ -13371,7 +13371,7 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v
|
||||
|
||||
// OpenCL v2.0 s6.13.11 - Atomics Functions
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics
|
||||
#if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics)
|
||||
@ -13692,7 +13692,7 @@ void __ovld atomic_flag_clear(volatile atomic_flag *object);
|
||||
void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order);
|
||||
void __ovld atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order, memory_scope scope);
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v1.1 s6.11.12, v1.2 s6.12.12, v2.0 s6.13.12 - Miscellaneous Vector Functions
|
||||
|
||||
@ -14186,7 +14186,7 @@ half16 __ovld __cnfn shuffle2(half8 x, half8 y, ushort16 mask);
|
||||
half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask);
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
// OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
|
||||
|
||||
int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
|
||||
@ -14307,7 +14307,7 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
|
||||
@ -14315,7 +14315,7 @@ int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
@ -14325,7 +14325,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, int coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
|
||||
@ -14333,7 +14333,7 @@ int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_
|
||||
int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, int2 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
|
||||
#ifdef cl_khr_depth_images
|
||||
float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord);
|
||||
@ -14358,7 +14358,7 @@ float __purefn __ovld read_imagef(read_only image2d_array_msaa_depth_t image, in
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
// OpenCL Extension v2.0 s9.18 - Mipmaps
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifdef cl_khr_mipmap_image
|
||||
|
||||
float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod);
|
||||
@ -14410,9 +14410,9 @@ int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, f
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
|
||||
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
|
||||
/**
|
||||
* Sampler-less Image Access
|
||||
@ -14447,7 +14447,7 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, int4 coord);
|
||||
int4 __purefn __ovld read_imagei(read_only image3d_t image, int4 coord);
|
||||
uint4 __purefn __ovld read_imageui(read_only image3d_t image, int4 coord);
|
||||
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
|
||||
// Image read functions returning half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
@ -14457,7 +14457,7 @@ half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler,
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image3d_t image, sampler_t sampler, float4 coord);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, sampler_t sampler, float2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, sampler_t sampler, int4 coord);
|
||||
@ -14471,11 +14471,11 @@ half4 __purefn __ovld read_imageh(read_only image3d_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_array_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image2d_array_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
|
||||
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
// Image read functions for read_write images
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);
|
||||
int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);
|
||||
uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);
|
||||
@ -14518,7 +14518,7 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co
|
||||
float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifdef cl_khr_mipmap_image
|
||||
float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
|
||||
int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
|
||||
@ -14569,7 +14569,7 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler,
|
||||
uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
|
||||
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// Image read functions returning half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
@ -14580,7 +14580,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
|
||||
half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
|
||||
half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Write color value to location specified by coordinate
|
||||
@ -14681,7 +14681,7 @@ void __ovld write_imagef(write_only image2d_array_depth_t image, int4 coord, flo
|
||||
#endif //cl_khr_depth_images
|
||||
|
||||
// OpenCL Extension v2.0 s9.18 - Mipmaps
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifdef cl_khr_mipmap_image
|
||||
void __ovld write_imagef(write_only image1d_t image, int coord, int lod, float4 color);
|
||||
void __ovld write_imagei(write_only image1d_t image, int coord, int lod, int4 color);
|
||||
@ -14708,7 +14708,7 @@ void __ovld write_imagei(write_only image3d_t image, int4 coord, int lod, int4 c
|
||||
void __ovld write_imageui(write_only image3d_t image, int4 coord, int lod, uint4 color);
|
||||
#endif
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// Image write functions for half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
@ -14723,7 +14723,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col
|
||||
#endif //cl_khr_fp16
|
||||
|
||||
// Image write functions for read_write images
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);
|
||||
void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);
|
||||
void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);
|
||||
@ -14755,7 +14755,7 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col
|
||||
void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);
|
||||
#endif //cl_khr_depth_images
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifdef cl_khr_mipmap_image
|
||||
void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);
|
||||
void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);
|
||||
@ -14782,7 +14782,7 @@ void __ovld write_imagei(read_write image3d_t image, int4 coord, int lod, int4 c
|
||||
void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4 color);
|
||||
#endif
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// Image write functions for half4 type
|
||||
#ifdef cl_khr_fp16
|
||||
@ -14795,7 +14795,7 @@ void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 col
|
||||
void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);
|
||||
void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);
|
||||
#endif //cl_khr_fp16
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have
|
||||
// access qualifier, which by default assume read_only access qualifier. Image query builtin
|
||||
@ -14843,7 +14843,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __cnfn get_image_width(read_write image1d_t image);
|
||||
int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);
|
||||
int __ovld __cnfn get_image_width(read_write image2d_t image);
|
||||
@ -14860,7 +14860,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);
|
||||
int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the image height in pixels.
|
||||
@ -14895,7 +14895,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __cnfn get_image_height(read_write image2d_t image);
|
||||
int __ovld __cnfn get_image_height(read_write image3d_t image);
|
||||
int __ovld __cnfn get_image_height(read_write image2d_array_t image);
|
||||
@ -14909,7 +14909,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);
|
||||
int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the image depth in pixels.
|
||||
@ -14920,12 +14920,12 @@ int __ovld __cnfn get_image_depth(read_only image3d_t image);
|
||||
int __ovld __cnfn get_image_depth(write_only image3d_t image);
|
||||
#endif
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __cnfn get_image_depth(read_write image3d_t image);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL Extension v2.0 s9.18 - Mipmaps
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#ifdef cl_khr_mipmap_image
|
||||
/**
|
||||
* Return the image miplevels.
|
||||
@ -14961,7 +14961,7 @@ int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);
|
||||
int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
|
||||
|
||||
#endif //cl_khr_mipmap_image
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the channel data type. Valid values are:
|
||||
@ -15018,7 +15018,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im
|
||||
int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);
|
||||
int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);
|
||||
int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);
|
||||
@ -15035,7 +15035,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im
|
||||
int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the image channel order. Valid values are:
|
||||
@ -15090,7 +15090,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image)
|
||||
int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __cnfn get_image_channel_order(read_write image1d_t image);
|
||||
int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);
|
||||
int __ovld __cnfn get_image_channel_order(read_write image2d_t image);
|
||||
@ -15107,7 +15107,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image)
|
||||
int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);
|
||||
int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the 2D image width and height as an int2
|
||||
@ -15140,7 +15140,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);
|
||||
int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int2 __ovld __cnfn get_image_dim(read_write image2d_t image);
|
||||
int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);
|
||||
#ifdef cl_khr_depth_images
|
||||
@ -15153,7 +15153,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);
|
||||
int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);
|
||||
int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the 3D image width, height, and depth as an
|
||||
@ -15165,9 +15165,9 @@ int4 __ovld __cnfn get_image_dim(read_only image3d_t image);
|
||||
#ifdef cl_khr_3d_image_writes
|
||||
int4 __ovld __cnfn get_image_dim(write_only image3d_t image);
|
||||
#endif
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int4 __ovld __cnfn get_image_dim(read_write image3d_t image);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the image array size.
|
||||
@ -15193,7 +15193,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_
|
||||
size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);
|
||||
size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);
|
||||
#ifdef cl_khr_depth_images
|
||||
@ -15203,7 +15203,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image
|
||||
size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);
|
||||
size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);
|
||||
#endif //cl_khr_gl_msaa_sharing
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
/**
|
||||
* Return the number of samples associated with image
|
||||
@ -15219,17 +15219,17 @@ int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);
|
||||
int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
|
||||
int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld get_image_num_samples(read_write image2d_msaa_t image);
|
||||
int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
|
||||
int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
|
||||
int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif
|
||||
|
||||
// OpenCL v2.0 s6.13.15 - Work-group Functions
|
||||
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
int __ovld __conv work_group_all(int predicate);
|
||||
int __ovld __conv work_group_any(int predicate);
|
||||
|
||||
@ -15327,16 +15327,16 @@ double __ovld __conv work_group_scan_inclusive_min(double x);
|
||||
double __ovld __conv work_group_scan_inclusive_max(double x);
|
||||
#endif //cl_khr_fp64
|
||||
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL v2.0 s6.13.16 - Pipe Functions
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
|
||||
// OpenCL v2.0 s6.13.17 - Enqueue Kernels
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
ndrange_t __ovld ndrange_1D(size_t);
|
||||
ndrange_t __ovld ndrange_1D(size_t, size_t);
|
||||
@ -15365,7 +15365,7 @@ bool __ovld is_valid_event (clk_event_t event);
|
||||
void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);
|
||||
|
||||
queue_t __ovld get_default_queue(void);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
// OpenCL Extension v2.0 s9.17 - Sub-groups
|
||||
|
||||
@ -15374,16 +15374,16 @@ queue_t __ovld get_default_queue(void);
|
||||
uint __ovld get_sub_group_size(void);
|
||||
uint __ovld get_max_sub_group_size(void);
|
||||
uint __ovld get_num_sub_groups(void);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld get_enqueued_num_sub_groups(void);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld get_sub_group_id(void);
|
||||
uint __ovld get_sub_group_local_id(void);
|
||||
|
||||
void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags);
|
||||
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv sub_group_barrier(cl_mem_fence_flags flags, memory_scope scope);
|
||||
#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0
|
||||
#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
int __ovld __conv sub_group_all(int predicate);
|
||||
int __ovld __conv sub_group_any(int predicate);
|
||||
@ -15573,12 +15573,12 @@ uint2 __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, in
|
||||
uint4 __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
|
||||
uint2 __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
|
||||
uint4 __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
|
||||
uint8 __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read2( const __global uint* p );
|
||||
@ -15590,12 +15590,12 @@ void __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, i
|
||||
void __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
|
||||
void __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
|
||||
void __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
|
||||
void __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
|
||||
@ -15713,12 +15713,12 @@ uint2 __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t ima
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
|
||||
uint4 __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
|
||||
uint8 __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
uint __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
|
||||
uint2 __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
|
||||
@ -15730,12 +15730,12 @@ void __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t im
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
|
||||
void __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
|
||||
@ -15747,12 +15747,12 @@ ushort2 __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t im
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
ushort __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
|
||||
ushort4 __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
|
||||
ushort8 __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
ushort __ovld __conv intel_sub_group_block_read_us( const __global ushort* p );
|
||||
ushort2 __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
|
||||
@ -15764,12 +15764,12 @@ void __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t i
|
||||
void __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
|
||||
|
||||
#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
void __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort data);
|
||||
void __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
|
||||
void __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
|
||||
#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
|
||||
|
||||
void __ovld __conv intel_sub_group_block_write_us( __global ushort* p, ushort data );
|
||||
void __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "clang/Lex/Token.h"
|
||||
#include "clang/Lex/VariadicMacroSupport.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/ScopeExit.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
@ -2399,6 +2400,13 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
|
||||
Token Tok;
|
||||
LexUnexpandedToken(Tok);
|
||||
|
||||
// Ensure we consume the rest of the macro body if errors occur.
|
||||
auto _ = llvm::make_scope_exit([&]() {
|
||||
// The flag indicates if we are still waiting for 'eod'.
|
||||
if (CurLexer->ParsingPreprocessorDirective)
|
||||
DiscardUntilEndOfDirective();
|
||||
});
|
||||
|
||||
// Used to un-poison and then re-poison identifiers of the __VA_ARGS__ ilk
|
||||
// within their appropriate context.
|
||||
VariadicMacroScopeGuard VariadicMacroScopeGuard(*this);
|
||||
@ -2420,12 +2428,8 @@ MacroInfo *Preprocessor::ReadOptionalMacroParameterListAndBody(
|
||||
} else if (Tok.is(tok::l_paren)) {
|
||||
// This is a function-like macro definition. Read the argument list.
|
||||
MI->setIsFunctionLike();
|
||||
if (ReadMacroParameterList(MI, LastTok)) {
|
||||
// Throw away the rest of the line.
|
||||
if (CurPPLexer->ParsingPreprocessorDirective)
|
||||
DiscardUntilEndOfDirective();
|
||||
if (ReadMacroParameterList(MI, LastTok))
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// If this is a definition of an ISO C/C++ variadic function-like macro (not
|
||||
// using the GNU named varargs extension) inform our variadic scope guard
|
||||
|
@ -961,6 +961,7 @@ void Sema::ActOnEndOfTranslationUnit() {
|
||||
|
||||
// All dllexport classes should have been processed already.
|
||||
assert(DelayedDllExportClasses.empty());
|
||||
assert(DelayedDllExportMemberFunctions.empty());
|
||||
|
||||
// Remove file scoped decls that turned out to be used.
|
||||
UnusedFileScopedDecls.erase(
|
||||
|
@ -1984,10 +1984,27 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID,
|
||||
ASTContext::GetBuiltinTypeError Error;
|
||||
QualType R = Context.GetBuiltinType(ID, Error);
|
||||
if (Error) {
|
||||
if (ForRedeclaration)
|
||||
Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
|
||||
<< getHeaderName(Context.BuiltinInfo, ID, Error)
|
||||
if (!ForRedeclaration)
|
||||
return nullptr;
|
||||
|
||||
// If we have a builtin without an associated type we should not emit a
|
||||
// warning when we were not able to find a type for it.
|
||||
if (Error == ASTContext::GE_Missing_type)
|
||||
return nullptr;
|
||||
|
||||
// If we could not find a type for setjmp it is because the jmp_buf type was
|
||||
// not defined prior to the setjmp declaration.
|
||||
if (Error == ASTContext::GE_Missing_setjmp) {
|
||||
Diag(Loc, diag::warn_implicit_decl_no_jmp_buf)
|
||||
<< Context.BuiltinInfo.getName(ID);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Generally, we emit a warning that the declaration requires the
|
||||
// appropriate header.
|
||||
Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
|
||||
<< getHeaderName(Context.BuiltinInfo, ID, Error)
|
||||
<< Context.BuiltinInfo.getName(ID);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -11527,9 +11544,12 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) {
|
||||
// Check for self-references within variable initializers.
|
||||
// Variables declared within a function/method body (except for references)
|
||||
// are handled by a dataflow analysis.
|
||||
if (!VDecl->hasLocalStorage() || VDecl->getType()->isRecordType() ||
|
||||
VDecl->getType()->isReferenceType()) {
|
||||
CheckSelfReference(*this, RealDecl, Init, DirectInit);
|
||||
// This is undefined behavior in C++, but valid in C.
|
||||
if (getLangOpts().CPlusPlus) {
|
||||
if (!VDecl->hasLocalStorage() || VDecl->getType()->isRecordType() ||
|
||||
VDecl->getType()->isReferenceType()) {
|
||||
CheckSelfReference(*this, RealDecl, Init, DirectInit);
|
||||
}
|
||||
}
|
||||
|
||||
// If the type changed, it means we had an incomplete type that was
|
||||
|
@ -1030,8 +1030,10 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
|
||||
TemplateArgumentListInfo Args(Loc, Loc);
|
||||
Args.addArgument(getTrivialTypeTemplateArgument(S, Loc, T));
|
||||
|
||||
// If there's no tuple_size specialization, it's not tuple-like.
|
||||
if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /*DiagID*/0))
|
||||
// If there's no tuple_size specialization or the lookup of 'value' is empty,
|
||||
// it's not tuple-like.
|
||||
if (lookupStdTypeTraitMember(S, R, Loc, "tuple_size", Args, /*DiagID*/ 0) ||
|
||||
R.empty())
|
||||
return IsTupleLike::NotTupleLike;
|
||||
|
||||
// If we get this far, we've committed to the tuple interpretation, but
|
||||
@ -1048,11 +1050,6 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
|
||||
}
|
||||
} Diagnoser(R, Args);
|
||||
|
||||
if (R.empty()) {
|
||||
Diagnoser.diagnoseNotICE(S, Loc, SourceRange());
|
||||
return IsTupleLike::Error;
|
||||
}
|
||||
|
||||
ExprResult E =
|
||||
S.BuildDeclarationNameExpr(CXXScopeSpec(), R, /*NeedsADL*/false);
|
||||
if (E.isInvalid())
|
||||
@ -6165,8 +6162,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) {
|
||||
M->dropAttr<DLLExportAttr>();
|
||||
|
||||
if (M->hasAttr<DLLExportAttr>()) {
|
||||
DefineImplicitSpecialMember(*this, M, M->getLocation());
|
||||
ActOnFinishInlineFunctionDef(M);
|
||||
// Define after any fields with in-class initializers have been parsed.
|
||||
DelayedDllExportMemberFunctions.push_back(M);
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -11419,6 +11416,21 @@ void Sema::ActOnFinishCXXMemberDecls() {
|
||||
|
||||
void Sema::ActOnFinishCXXNonNestedClass(Decl *D) {
|
||||
referenceDLLExportedClassMethods();
|
||||
|
||||
if (!DelayedDllExportMemberFunctions.empty()) {
|
||||
SmallVector<CXXMethodDecl*, 4> WorkList;
|
||||
std::swap(DelayedDllExportMemberFunctions, WorkList);
|
||||
for (CXXMethodDecl *M : WorkList) {
|
||||
DefineImplicitSpecialMember(*this, M, M->getLocation());
|
||||
|
||||
// Pass the method to the consumer to get emitted. This is not necessary
|
||||
// for explicit instantiation definitions, as they will get emitted
|
||||
// anyway.
|
||||
if (M->getParent()->getTemplateSpecializationKind() !=
|
||||
TSK_ExplicitInstantiationDefinition)
|
||||
ActOnFinishInlineFunctionDef(M);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Sema::referenceDLLExportedClassMethods() {
|
||||
|
@ -6794,14 +6794,10 @@ ExprResult Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base,
|
||||
// it's legal for the type to be incomplete if this is a pseudo-destructor
|
||||
// call. We'll do more incomplete-type checks later in the lookup process,
|
||||
// so just skip this check for ObjC types.
|
||||
if (BaseType->isObjCObjectOrInterfaceType()) {
|
||||
if (!BaseType->isRecordType()) {
|
||||
ObjectType = ParsedType::make(BaseType);
|
||||
MayBePseudoDestructor = true;
|
||||
return Base;
|
||||
} else if (!BaseType->isRecordType()) {
|
||||
ObjectType = nullptr;
|
||||
MayBePseudoDestructor = true;
|
||||
return Base;
|
||||
}
|
||||
|
||||
// The object type must be complete (or dependent), or
|
||||
|
@ -1289,7 +1289,16 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
|
||||
// FIXME: Better EqualLoc?
|
||||
InitializationKind Kind =
|
||||
InitializationKind::CreateCopy(expr->getBeginLoc(), SourceLocation());
|
||||
InitializationSequence Seq(SemaRef, Entity, Kind, expr,
|
||||
|
||||
// Vector elements can be initialized from other vectors in which case
|
||||
// we need initialization entity with a type of a vector (and not a vector
|
||||
// element!) initializing multiple vector elements.
|
||||
auto TmpEntity =
|
||||
(ElemType->isExtVectorType() && !Entity.getType()->isExtVectorType())
|
||||
? InitializedEntity::InitializeTemporary(ElemType)
|
||||
: Entity;
|
||||
|
||||
InitializationSequence Seq(SemaRef, TmpEntity, Kind, expr,
|
||||
/*TopLevelOfInitList*/ true);
|
||||
|
||||
// C++14 [dcl.init.aggr]p13:
|
||||
@ -1300,8 +1309,7 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity,
|
||||
// assignment-expression.
|
||||
if (Seq || isa<InitListExpr>(expr)) {
|
||||
if (!VerifyOnly) {
|
||||
ExprResult Result =
|
||||
Seq.Perform(SemaRef, Entity, Kind, expr);
|
||||
ExprResult Result = Seq.Perform(SemaRef, TmpEntity, Kind, expr);
|
||||
if (Result.isInvalid())
|
||||
hadError = true;
|
||||
|
||||
@ -8108,7 +8116,7 @@ ExprResult InitializationSequence::Perform(Sema &S,
|
||||
// argument passing.
|
||||
assert(Step->Type->isSamplerT() &&
|
||||
"Sampler initialization on non-sampler type.");
|
||||
Expr *Init = CurInit.get();
|
||||
Expr *Init = CurInit.get()->IgnoreParens();
|
||||
QualType SourceType = Init->getType();
|
||||
// Case 1
|
||||
if (Entity.isParameterKind()) {
|
||||
|
@ -139,6 +139,7 @@ class DSAStackTy {
|
||||
/// clause, false otherwise.
|
||||
llvm::Optional<std::pair<const Expr *, OMPOrderedClause *>> OrderedRegion;
|
||||
unsigned AssociatedLoops = 1;
|
||||
bool HasMutipleLoops = false;
|
||||
const Decl *PossiblyLoopCounter = nullptr;
|
||||
bool NowaitRegion = false;
|
||||
bool CancelRegion = false;
|
||||
@ -678,12 +679,19 @@ class DSAStackTy {
|
||||
/// Set collapse value for the region.
|
||||
void setAssociatedLoops(unsigned Val) {
|
||||
getTopOfStack().AssociatedLoops = Val;
|
||||
if (Val > 1)
|
||||
getTopOfStack().HasMutipleLoops = true;
|
||||
}
|
||||
/// Return collapse value for region.
|
||||
unsigned getAssociatedLoops() const {
|
||||
const SharingMapTy *Top = getTopOfStackOrNull();
|
||||
return Top ? Top->AssociatedLoops : 0;
|
||||
}
|
||||
/// Returns true if the construct is associated with multiple loops.
|
||||
bool hasMutipleLoops() const {
|
||||
const SharingMapTy *Top = getTopOfStackOrNull();
|
||||
return Top ? Top->HasMutipleLoops : false;
|
||||
}
|
||||
|
||||
/// Marks current target region as one with closely nested teams
|
||||
/// region.
|
||||
@ -1874,6 +1882,13 @@ bool Sema::isOpenMPPrivateDecl(const ValueDecl *D, unsigned Level) const {
|
||||
!isOpenMPSimdDirective(DSAStack->getCurrentDirective()))
|
||||
return true;
|
||||
}
|
||||
if (const auto *VD = dyn_cast<VarDecl>(D)) {
|
||||
if (DSAStack->isThreadPrivate(const_cast<VarDecl *>(VD)) &&
|
||||
DSAStack->isForceVarCapturing() &&
|
||||
!DSAStack->hasExplicitDSA(
|
||||
D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level))
|
||||
return true;
|
||||
}
|
||||
return DSAStack->hasExplicitDSA(
|
||||
D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) ||
|
||||
(DSAStack->isClauseParsingMode() &&
|
||||
@ -5604,13 +5619,14 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
|
||||
if (!ISC.checkAndSetInit(Init, /*EmitDiags=*/false)) {
|
||||
if (ValueDecl *D = ISC.getLoopDecl()) {
|
||||
auto *VD = dyn_cast<VarDecl>(D);
|
||||
DeclRefExpr *PrivateRef = nullptr;
|
||||
if (!VD) {
|
||||
if (VarDecl *Private = isOpenMPCapturedDecl(D)) {
|
||||
VD = Private;
|
||||
} else {
|
||||
DeclRefExpr *Ref = buildCapture(*this, D, ISC.getLoopDeclRefExpr(),
|
||||
/*WithInit=*/false);
|
||||
VD = cast<VarDecl>(Ref->getDecl());
|
||||
PrivateRef = buildCapture(*this, D, ISC.getLoopDeclRefExpr(),
|
||||
/*WithInit=*/false);
|
||||
VD = cast<VarDecl>(PrivateRef->getDecl());
|
||||
}
|
||||
}
|
||||
DSAStack->addLoopControlVariable(D, VD);
|
||||
@ -5623,6 +5639,49 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
|
||||
Var->getType().getNonLValueExprType(Context),
|
||||
ForLoc, /*RefersToCapture=*/true));
|
||||
}
|
||||
OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
|
||||
// OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables
|
||||
// Referenced in a Construct, C/C++]. The loop iteration variable in the
|
||||
// associated for-loop of a simd construct with just one associated
|
||||
// for-loop may be listed in a linear clause with a constant-linear-step
|
||||
// that is the increment of the associated for-loop. The loop iteration
|
||||
// variable(s) in the associated for-loop(s) of a for or parallel for
|
||||
// construct may be listed in a private or lastprivate clause.
|
||||
DSAStackTy::DSAVarData DVar =
|
||||
DSAStack->getTopDSA(D, /*FromParent=*/false);
|
||||
// If LoopVarRefExpr is nullptr it means the corresponding loop variable
|
||||
// is declared in the loop and it is predetermined as a private.
|
||||
Expr *LoopDeclRefExpr = ISC.getLoopDeclRefExpr();
|
||||
OpenMPClauseKind PredeterminedCKind =
|
||||
isOpenMPSimdDirective(DKind)
|
||||
? (DSAStack->hasMutipleLoops() ? OMPC_lastprivate : OMPC_linear)
|
||||
: OMPC_private;
|
||||
if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
|
||||
DVar.CKind != PredeterminedCKind && DVar.RefExpr &&
|
||||
(LangOpts.OpenMP <= 45 || (DVar.CKind != OMPC_lastprivate &&
|
||||
DVar.CKind != OMPC_private))) ||
|
||||
((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop ||
|
||||
isOpenMPDistributeDirective(DKind)) &&
|
||||
!isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
|
||||
DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) &&
|
||||
(DVar.CKind != OMPC_private || DVar.RefExpr)) {
|
||||
Diag(Init->getBeginLoc(), diag::err_omp_loop_var_dsa)
|
||||
<< getOpenMPClauseName(DVar.CKind)
|
||||
<< getOpenMPDirectiveName(DKind)
|
||||
<< getOpenMPClauseName(PredeterminedCKind);
|
||||
if (DVar.RefExpr == nullptr)
|
||||
DVar.CKind = PredeterminedCKind;
|
||||
reportOriginalDsa(*this, DSAStack, D, DVar,
|
||||
/*IsLoopIterVar=*/true);
|
||||
} else if (LoopDeclRefExpr) {
|
||||
// Make the loop iteration variable private (for worksharing
|
||||
// constructs), linear (for simd directives with the only one
|
||||
// associated loop) or lastprivate (for simd directives with several
|
||||
// collapsed or ordered loops).
|
||||
if (DVar.CKind == OMPC_unknown)
|
||||
DSAStack->addDSA(D, LoopDeclRefExpr, PredeterminedCKind,
|
||||
PrivateRef);
|
||||
}
|
||||
}
|
||||
}
|
||||
DSAStack->setAssociatedLoops(AssociatedLoops - 1);
|
||||
@ -5677,8 +5736,6 @@ static bool checkOpenMPIterationSpace(
|
||||
|
||||
// Check loop variable's type.
|
||||
if (ValueDecl *LCDecl = ISC.getLoopDecl()) {
|
||||
Expr *LoopDeclRefExpr = ISC.getLoopDeclRefExpr();
|
||||
|
||||
// OpenMP [2.6, Canonical Loop Form]
|
||||
// Var is one of the following:
|
||||
// A variable of signed or unsigned integer type.
|
||||
@ -5704,46 +5761,6 @@ static bool checkOpenMPIterationSpace(
|
||||
// sharing attributes.
|
||||
VarsWithImplicitDSA.erase(LCDecl);
|
||||
|
||||
// OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
|
||||
// in a Construct, C/C++].
|
||||
// The loop iteration variable in the associated for-loop of a simd
|
||||
// construct with just one associated for-loop may be listed in a linear
|
||||
// clause with a constant-linear-step that is the increment of the
|
||||
// associated for-loop.
|
||||
// The loop iteration variable(s) in the associated for-loop(s) of a for or
|
||||
// parallel for construct may be listed in a private or lastprivate clause.
|
||||
DSAStackTy::DSAVarData DVar = DSA.getTopDSA(LCDecl, false);
|
||||
// If LoopVarRefExpr is nullptr it means the corresponding loop variable is
|
||||
// declared in the loop and it is predetermined as a private.
|
||||
OpenMPClauseKind PredeterminedCKind =
|
||||
isOpenMPSimdDirective(DKind)
|
||||
? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate)
|
||||
: OMPC_private;
|
||||
if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
|
||||
DVar.CKind != PredeterminedCKind && DVar.RefExpr &&
|
||||
(SemaRef.getLangOpts().OpenMP <= 45 ||
|
||||
(DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_private))) ||
|
||||
((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop ||
|
||||
isOpenMPDistributeDirective(DKind)) &&
|
||||
!isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
|
||||
DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) &&
|
||||
(DVar.CKind != OMPC_private || DVar.RefExpr)) {
|
||||
SemaRef.Diag(Init->getBeginLoc(), diag::err_omp_loop_var_dsa)
|
||||
<< getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind)
|
||||
<< getOpenMPClauseName(PredeterminedCKind);
|
||||
if (DVar.RefExpr == nullptr)
|
||||
DVar.CKind = PredeterminedCKind;
|
||||
reportOriginalDsa(SemaRef, &DSA, LCDecl, DVar, /*IsLoopIterVar=*/true);
|
||||
HasErrors = true;
|
||||
} else if (LoopDeclRefExpr != nullptr) {
|
||||
// Make the loop iteration variable private (for worksharing constructs),
|
||||
// linear (for simd directives with the only one associated loop) or
|
||||
// lastprivate (for simd directives with several collapsed or ordered
|
||||
// loops).
|
||||
if (DVar.CKind == OMPC_unknown)
|
||||
DSA.addDSA(LCDecl, LoopDeclRefExpr, PredeterminedCKind);
|
||||
}
|
||||
|
||||
assert(isOpenMPLoopDirective(DKind) && "DSA for non-loop vars");
|
||||
|
||||
// Check test-expr.
|
||||
|
@ -383,25 +383,19 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
|
||||
} else if (Info.requiresImmediateConstant() && !Info.allowsRegister()) {
|
||||
if (!InputExpr->isValueDependent()) {
|
||||
Expr::EvalResult EVResult;
|
||||
if (!InputExpr->EvaluateAsRValue(EVResult, Context, true))
|
||||
return StmtError(
|
||||
Diag(InputExpr->getBeginLoc(), diag::err_asm_immediate_expected)
|
||||
<< Info.getConstraintStr() << InputExpr->getSourceRange());
|
||||
|
||||
// For compatibility with GCC, we also allow pointers that would be
|
||||
// integral constant expressions if they were cast to int.
|
||||
llvm::APSInt IntResult;
|
||||
if (!EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
|
||||
Context))
|
||||
return StmtError(
|
||||
Diag(InputExpr->getBeginLoc(), diag::err_asm_immediate_expected)
|
||||
<< Info.getConstraintStr() << InputExpr->getSourceRange());
|
||||
|
||||
if (!Info.isValidAsmImmediate(IntResult))
|
||||
return StmtError(Diag(InputExpr->getBeginLoc(),
|
||||
diag::err_invalid_asm_value_for_constraint)
|
||||
<< IntResult.toString(10) << Info.getConstraintStr()
|
||||
<< InputExpr->getSourceRange());
|
||||
if (InputExpr->EvaluateAsRValue(EVResult, Context, true)) {
|
||||
// For compatibility with GCC, we also allow pointers that would be
|
||||
// integral constant expressions if they were cast to int.
|
||||
llvm::APSInt IntResult;
|
||||
if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
|
||||
Context))
|
||||
if (!Info.isValidAsmImmediate(IntResult))
|
||||
return StmtError(Diag(InputExpr->getBeginLoc(),
|
||||
diag::err_invalid_asm_value_for_constraint)
|
||||
<< IntResult.toString(10)
|
||||
<< Info.getConstraintStr()
|
||||
<< InputExpr->getSourceRange());
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -362,13 +362,27 @@ bool Sema::LookupTemplateName(LookupResult &Found,
|
||||
// x->B::f, and we are looking into the type of the object.
|
||||
assert(!SS.isSet() && "ObjectType and scope specifier cannot coexist");
|
||||
LookupCtx = computeDeclContext(ObjectType);
|
||||
IsDependent = !LookupCtx;
|
||||
IsDependent = !LookupCtx && ObjectType->isDependentType();
|
||||
assert((IsDependent || !ObjectType->isIncompleteType() ||
|
||||
ObjectType->castAs<TagType>()->isBeingDefined()) &&
|
||||
"Caller should have completed object type");
|
||||
|
||||
// Template names cannot appear inside an Objective-C class or object type.
|
||||
if (ObjectType->isObjCObjectOrInterfaceType()) {
|
||||
// Template names cannot appear inside an Objective-C class or object type
|
||||
// or a vector type.
|
||||
//
|
||||
// FIXME: This is wrong. For example:
|
||||
//
|
||||
// template<typename T> using Vec = T __attribute__((ext_vector_type(4)));
|
||||
// Vec<int> vi;
|
||||
// vi.Vec<int>::~Vec<int>();
|
||||
//
|
||||
// ... should be accepted but we will not treat 'Vec' as a template name
|
||||
// here. The right thing to do would be to check if the name is a valid
|
||||
// vector component name, and look up a template name if not. And similarly
|
||||
// for lookups into Objective-C class and object types, where the same
|
||||
// problem can arise.
|
||||
if (ObjectType->isObjCObjectOrInterfaceType() ||
|
||||
ObjectType->isVectorType()) {
|
||||
Found.clear();
|
||||
return false;
|
||||
}
|
||||
|
@ -406,13 +406,15 @@ void IteratorChecker::checkPreCall(const CallEvent &Call,
|
||||
} else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) {
|
||||
if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) {
|
||||
// Check for out-of-range incrementions and decrementions
|
||||
if (Call.getNumArgs() >= 1) {
|
||||
if (Call.getNumArgs() >= 1 &&
|
||||
Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) {
|
||||
verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(),
|
||||
InstCall->getCXXThisVal(),
|
||||
Call.getArgSVal(0));
|
||||
}
|
||||
} else {
|
||||
if (Call.getNumArgs() >= 2) {
|
||||
if (Call.getNumArgs() >= 2 &&
|
||||
Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) {
|
||||
verifyRandomIncrOrDecr(C, Func->getOverloadedOperator(),
|
||||
Call.getArgSVal(0), Call.getArgSVal(1));
|
||||
}
|
||||
@ -590,14 +592,16 @@ void IteratorChecker::checkPostCall(const CallEvent &Call,
|
||||
return;
|
||||
} else if (isRandomIncrOrDecrOperator(Func->getOverloadedOperator())) {
|
||||
if (const auto *InstCall = dyn_cast<CXXInstanceCall>(&Call)) {
|
||||
if (Call.getNumArgs() >= 1) {
|
||||
if (Call.getNumArgs() >= 1 &&
|
||||
Call.getArgExpr(0)->getType()->isIntegralOrEnumerationType()) {
|
||||
handleRandomIncrOrDecr(C, Func->getOverloadedOperator(),
|
||||
Call.getReturnValue(),
|
||||
InstCall->getCXXThisVal(), Call.getArgSVal(0));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (Call.getNumArgs() >= 2) {
|
||||
if (Call.getNumArgs() >= 2 &&
|
||||
Call.getArgExpr(1)->getType()->isIntegralOrEnumerationType()) {
|
||||
handleRandomIncrOrDecr(C, Func->getOverloadedOperator(),
|
||||
Call.getReturnValue(), Call.getArgSVal(0),
|
||||
Call.getArgSVal(1));
|
||||
|
@ -184,8 +184,10 @@ void LinkerDriver::addBuffer(std::unique_ptr<MemoryBuffer> mb,
|
||||
if (wholeArchive) {
|
||||
std::unique_ptr<Archive> file =
|
||||
CHECK(Archive::create(mbref), filename + ": failed to parse archive");
|
||||
Archive *archive = file.get();
|
||||
make<std::unique_ptr<Archive>>(std::move(file)); // take ownership
|
||||
|
||||
for (MemoryBufferRef m : getArchiveMembers(file.get()))
|
||||
for (MemoryBufferRef m : getArchiveMembers(archive))
|
||||
addArchiveBuffer(m, "<whole-archive>", filename, 0);
|
||||
return;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user