MF head @ r283796
This commit is contained in:
commit
a31ce0ad92
@ -1276,8 +1276,8 @@ _sed= usr.bin/sed
|
||||
.endif
|
||||
|
||||
.if ${BOOTSTRAPPING} < 1000002
|
||||
_m4= lib/libohash \
|
||||
usr.bin/m4
|
||||
_libohash= lib/libohash
|
||||
_m4= usr.bin/m4
|
||||
|
||||
${_bt}-usr.bin/m4: ${_bt}-lib/libohash
|
||||
.endif
|
||||
@ -1363,6 +1363,15 @@ _kerberos5_bootstrap_tools= \
|
||||
.ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
|
||||
.endif
|
||||
|
||||
.if ${MK_MANDOCDB} != "no"
|
||||
_libohash?= lib/libohash
|
||||
_makewhatis= lib/libsqlite3 \
|
||||
usr.bin/mandoc
|
||||
${_bt}-usr.bin/mandoc: ${_bt}-lib/libohash ${_bt}-lib/libsqlite3
|
||||
.else
|
||||
_makewhatis=usr.bin/makewhatis
|
||||
.endif
|
||||
|
||||
# Rebuild up-to-date libmd for xinstall
|
||||
${_bt}-usr.bin/xinstall: ${_bt}-lib/libmd
|
||||
|
||||
@ -1383,7 +1392,8 @@ bootstrap-tools: .PHONY
|
||||
${_awk} \
|
||||
${_cat} \
|
||||
usr.bin/lorder \
|
||||
usr.bin/makewhatis \
|
||||
${_libohash} \
|
||||
${_makewhatis} \
|
||||
usr.bin/rpcgen \
|
||||
${_sed} \
|
||||
${_yacc} \
|
||||
@ -1755,6 +1765,7 @@ _cddl_lib= cddl/lib
|
||||
cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
|
||||
cddl/lib/libzfs__L: lib/libgeom__L
|
||||
cddl/lib/libctf__L: lib/libz__L
|
||||
.endif
|
||||
# cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
|
||||
# on select architectures though (see cddl/lib/Makefile)
|
||||
.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" || \
|
||||
@ -1762,7 +1773,6 @@ cddl/lib/libctf__L: lib/libz__L
|
||||
${MACHINE_CPUARCH} == "arm"
|
||||
_prebuild_libs+= lib/libproc lib/librtld_db
|
||||
.endif
|
||||
.endif
|
||||
|
||||
.if ${MK_CRYPT} != "no"
|
||||
.if ${MK_OPENSSL} != "no"
|
||||
@ -1806,9 +1816,10 @@ kerberos5/lib/libroken__L: lib/libcrypt__L
|
||||
kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
|
||||
kerberos5/lib/libheimbase__L: lib/libthr__L
|
||||
kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
|
||||
lib/libsqlite3__L: lib/libthr__L
|
||||
.endif
|
||||
|
||||
lib/libsqlite3__L: lib/libthr__L
|
||||
|
||||
.if ${MK_GSSAPI} != "no"
|
||||
_lib_libgssapi= lib/libgssapi
|
||||
.endif
|
||||
|
@ -38,6 +38,71 @@
|
||||
# xargs -n1 | sort | uniq -d;
|
||||
# done
|
||||
|
||||
# 20150528: PCI IOV device driver methods moved to a separate kobj interface.
|
||||
OLD_FILES+=usr/share/man/man9/PCI_ADD_VF.9
|
||||
OLD_FILES+=usr/share/man/man9/PCI_INIT_IOV.9
|
||||
OLD_FILES+=usr/share/man/man9/PCI_UNINIT_IOV.9
|
||||
# 20150525: new clang import which bumps version from 3.6.0 to 3.6.1.
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/__stddef_max_align_t.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/__wmmintrin_aes.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/__wmmintrin_pclmul.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/adxintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/altivec.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/ammintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/arm_acle.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/arm_neon.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx2intrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512bwintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512erintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512fintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512vlbwintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512vlintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/avxintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/bmi2intrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/bmiintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/cpuid.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/emmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/f16cintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/fma4intrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/fmaintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/ia32intrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/immintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/lzcntintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/mm3dnow.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/mm_malloc.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/mmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/module.modulemap
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/nmmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/pmmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/popcntintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/prfchwintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/rdseedintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/rtmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/shaintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/smmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/tbmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/tmmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/wmmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/x86intrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/xmmintrin.h
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/include/xopintrin.h
|
||||
OLD_DIRS+=usr/lib/clang/3.6.0/include
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan-x86_64.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan_cxx-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan_cxx-x86_64.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-arm.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-x86_64.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.san-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.san-x86_64.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan-x86_64.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan_cxx-i386.a
|
||||
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan_cxx-x86_64.a
|
||||
OLD_DIRS+=usr/lib/clang/3.6.0/lib/freebsd
|
||||
OLD_DIRS+=usr/lib/clang/3.6.0/lib
|
||||
OLD_DIRS+=usr/lib/clang/3.6.0
|
||||
# 20150521
|
||||
OLD_FILES+=usr/bin/demandoc
|
||||
OLD_FILES+=usr/share/man/man1/demandoc.1.gz
|
||||
|
5
UPDATING
5
UPDATING
@ -31,6 +31,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 11.x IS SLOW:
|
||||
disable the most expensive debugging functionality run
|
||||
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
|
||||
|
||||
20150525:
|
||||
Clang and llvm have been upgraded to 3.6.1 release. Please see the
|
||||
20141231 entry below for information about prerequisites and upgrading,
|
||||
if you are not already using 3.5.0 or higher.
|
||||
|
||||
20150521:
|
||||
TI platform code switched to using vendor DTS files and this update
|
||||
may break existing systems running on Beaglebone, Beaglebone Black,
|
||||
|
@ -29,7 +29,7 @@
|
||||
.\" @(#)ps.1 8.3 (Berkeley) 4/18/94
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 22, 2015
|
||||
.Dd May 27, 2015
|
||||
.Dt PS 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -350,6 +350,9 @@ the include file
|
||||
.In sys/proc.h :
|
||||
.Bl -column P2_INHERIT_PROTECTED 0x00000001
|
||||
.It Dv "P2_INHERIT_PROTECTED" Ta No "0x00000001" Ta "New children get P_PROTECTED"
|
||||
.It Dv "P2_NOTRACE" Ta No "0x00000002" Ta "No ptrace(2) attach or coredumps"
|
||||
.It Dv "P2_NOTRACE_EXEC" Ta No "0x00000004" Ta "Keep P2_NOPTRACE on exec(2)"
|
||||
.It Dv "P2_AST_SU" Ta No "0x00000008" Ta "Handles SU ast for kthreads"
|
||||
.El
|
||||
.It Cm label
|
||||
The MAC label of the process.
|
||||
|
@ -21,7 +21,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd October 8, 2014
|
||||
.Dd May 24, 2015
|
||||
.Dt LOCKSTAT 1
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -29,7 +29,7 @@
|
||||
.Nd report kernel lock and profiling statistics
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl ACEHI
|
||||
.Op Fl ACEHIV
|
||||
.Op Fl e Ar event-list
|
||||
.Op Fl i Ar rate
|
||||
.Op Fl b | t | h | s Ar depth
|
||||
@ -90,6 +90,10 @@ to
|
||||
is restricted to super-user by default.
|
||||
.Sh OPTIONS
|
||||
The following options are supported:
|
||||
.Bl -tag -width indent
|
||||
.It Fl V
|
||||
Print the D program used to gather the requested data.
|
||||
.El
|
||||
.Ss Event Selection
|
||||
If no event selection options are specified, the default is
|
||||
.Fl C .
|
||||
|
@ -299,6 +299,8 @@ usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"Usage: lockstat [options] command [args]\n"
|
||||
"\nGeneral options:\n\n"
|
||||
" -V print the corresponding D program\n"
|
||||
"\nEvent selection options:\n\n"
|
||||
" -C watch contention events [on by default]\n"
|
||||
" -E watch error events [off by default]\n"
|
||||
|
@ -23,7 +23,7 @@
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: _elftc.h 3175 2015-03-27 17:21:24Z emaste $
|
||||
* $Id: _elftc.h 3209 2015-05-17 13:40:46Z kaiwang27 $
|
||||
*/
|
||||
|
||||
/**
|
||||
@ -342,12 +342,13 @@ struct name { \
|
||||
|
||||
|
||||
#if defined(__GLIBC__) || defined(__linux__)
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
/*
|
||||
* GLIBC based systems have a global 'char *' pointer referencing
|
||||
* the executable's name.
|
||||
*/
|
||||
extern const char *program_invocation_short_name;
|
||||
#endif /* !_GNU_SOURCE */
|
||||
|
||||
#define ELFTC_GETPROGNAME() program_invocation_short_name
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: elfdefinitions.h 3178 2015-03-30 18:29:13Z emaste $
|
||||
* $Id: elfdefinitions.h 3198 2015-05-14 18:36:19Z emaste $
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -565,6 +565,7 @@ _ELF_DEFINE_EM(EM_SPARC, 2, "SPARC") \
|
||||
_ELF_DEFINE_EM(EM_386, 3, "Intel 80386") \
|
||||
_ELF_DEFINE_EM(EM_68K, 4, "Motorola 68000") \
|
||||
_ELF_DEFINE_EM(EM_88K, 5, "Motorola 88000") \
|
||||
_ELF_DEFINE_EM(EM_IAMCU, 6, "Intel MCU") \
|
||||
_ELF_DEFINE_EM(EM_860, 7, "Intel 80860") \
|
||||
_ELF_DEFINE_EM(EM_MIPS, 8, "MIPS I Architecture") \
|
||||
_ELF_DEFINE_EM(EM_S370, 9, "IBM System/370 Processor") \
|
||||
|
@ -23,7 +23,7 @@
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $Id: elfcopy.h 3173 2015-03-27 16:46:13Z emaste $
|
||||
* $Id: elfcopy.h 3221 2015-05-24 23:42:43Z kaiwang27 $
|
||||
*/
|
||||
|
||||
#include <sys/queue.h>
|
||||
@ -237,6 +237,7 @@ struct elfcopy {
|
||||
uint64_t *secndx; /* section index map. */
|
||||
uint64_t *symndx; /* symbol index map. */
|
||||
unsigned char *v_rel; /* symbols needed by relocation. */
|
||||
unsigned char *v_grp; /* symbols refered by section group. */
|
||||
unsigned char *v_secsym; /* sections with section symbol. */
|
||||
STAILQ_HEAD(, segment) v_seg; /* list of segments. */
|
||||
STAILQ_HEAD(, sec_action) v_sac;/* list of section operations. */
|
||||
|
@ -39,7 +39,7 @@
|
||||
|
||||
#include "elfcopy.h"
|
||||
|
||||
ELFTC_VCSID("$Id: main.c 3174 2015-03-27 17:13:41Z emaste $");
|
||||
ELFTC_VCSID("$Id: main.c 3216 2015-05-23 21:16:36Z kaiwang27 $");
|
||||
|
||||
enum options
|
||||
{
|
||||
@ -404,8 +404,19 @@ create_elf(struct elfcopy *ecp)
|
||||
* Insert SHDR table into the internal section list as a "pseudo"
|
||||
* section, so later it will get sorted and resynced just as "normal"
|
||||
* sections.
|
||||
*
|
||||
* Under FreeBSD, Binutils objcopy always put the section header
|
||||
* at the end of all the sections. We want to do the same here.
|
||||
*
|
||||
* However, note that the behaviour is still different with Binutils:
|
||||
* elfcopy checks the FreeBSD OSABI tag to tell whether it needs to
|
||||
* move the section headers, while Binutils is probably configured
|
||||
* this way when it's compiled on FreeBSD.
|
||||
*/
|
||||
shtab = insert_shtab(ecp, 0);
|
||||
if (oeh.e_ident[EI_OSABI] == ELFOSABI_FREEBSD)
|
||||
shtab = insert_shtab(ecp, 1);
|
||||
else
|
||||
shtab = insert_shtab(ecp, 0);
|
||||
|
||||
/*
|
||||
* Resync section offsets in the output object. This is needed
|
||||
@ -485,6 +496,11 @@ free_elf(struct elfcopy *ecp)
|
||||
free(sec);
|
||||
}
|
||||
}
|
||||
|
||||
if (ecp->secndx != NULL) {
|
||||
free(ecp->secndx);
|
||||
ecp->secndx = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create a temporary file. */
|
||||
|
@ -34,7 +34,7 @@
|
||||
|
||||
#include "elfcopy.h"
|
||||
|
||||
ELFTC_VCSID("$Id: sections.c 3185 2015-04-11 08:56:34Z kaiwang27 $");
|
||||
ELFTC_VCSID("$Id: sections.c 3220 2015-05-24 23:42:39Z kaiwang27 $");
|
||||
|
||||
static void add_gnu_debuglink(struct elfcopy *ecp);
|
||||
static uint32_t calc_crc32(const char *p, size_t len, uint32_t crc);
|
||||
@ -56,6 +56,7 @@ static void print_data(const char *d, size_t sz);
|
||||
static void print_section(struct section *s);
|
||||
static void *read_section(struct section *s, size_t *size);
|
||||
static void update_reloc(struct elfcopy *ecp, struct section *s);
|
||||
static void update_section_group(struct elfcopy *ecp, struct section *s);
|
||||
|
||||
int
|
||||
is_remove_section(struct elfcopy *ecp, const char *name)
|
||||
@ -552,6 +553,14 @@ copy_content(struct elfcopy *ecp)
|
||||
(s->type == SHT_REL || s->type == SHT_RELA))
|
||||
filter_reloc(ecp, s);
|
||||
|
||||
/*
|
||||
* The section indices in the SHT_GROUP section needs
|
||||
* to be updated since we might have stripped some
|
||||
* sections and changed section numbering.
|
||||
*/
|
||||
if (s->type == SHT_GROUP)
|
||||
update_section_group(ecp, s);
|
||||
|
||||
if (is_modify_section(ecp, s->name))
|
||||
modify_section(ecp, s);
|
||||
|
||||
@ -571,6 +580,71 @@ copy_content(struct elfcopy *ecp)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Update section group section. The section indices in the SHT_GROUP
|
||||
* section need update after section numbering changed.
|
||||
*/
|
||||
static void
|
||||
update_section_group(struct elfcopy *ecp, struct section *s)
|
||||
{
|
||||
GElf_Shdr ish;
|
||||
Elf_Data *id;
|
||||
uint32_t *ws, *wd;
|
||||
uint64_t n;
|
||||
size_t ishnum;
|
||||
int i, j;
|
||||
|
||||
if (!elf_getshnum(ecp->ein, &ishnum))
|
||||
errx(EXIT_FAILURE, "elf_getshnum failed: %s",
|
||||
elf_errmsg(-1));
|
||||
|
||||
if (gelf_getshdr(s->is, &ish) == NULL)
|
||||
errx(EXIT_FAILURE, "gelf_getehdr() failed: %s",
|
||||
elf_errmsg(-1));
|
||||
|
||||
if ((id = elf_getdata(s->is, NULL)) == NULL)
|
||||
errx(EXIT_FAILURE, "elf_getdata() failed: %s",
|
||||
elf_errmsg(-1));
|
||||
|
||||
if (ish.sh_size == 0)
|
||||
return;
|
||||
|
||||
if (ish.sh_entsize == 0)
|
||||
ish.sh_entsize = 4;
|
||||
|
||||
ws = id->d_buf;
|
||||
|
||||
/* We only support COMDAT section. */
|
||||
#ifndef GRP_COMDAT
|
||||
#define GRP_COMDAT 0x1
|
||||
#endif
|
||||
if ((*ws & GRP_COMDAT) == 0)
|
||||
return;
|
||||
|
||||
if ((s->buf = malloc(ish.sh_size)) == NULL)
|
||||
err(EXIT_FAILURE, "malloc failed");
|
||||
|
||||
s->sz = ish.sh_size;
|
||||
|
||||
wd = s->buf;
|
||||
|
||||
/* Copy the flag word as-is. */
|
||||
*wd = *ws;
|
||||
|
||||
/* Update the section indices. */
|
||||
n = ish.sh_size / ish.sh_entsize;
|
||||
for(i = 1, j = 1; (uint64_t)i < n; i++) {
|
||||
if (ws[i] != SHN_UNDEF && ws[i] < ishnum &&
|
||||
ecp->secndx[ws[i]] != 0)
|
||||
wd[j++] = ecp->secndx[ws[i]];
|
||||
else
|
||||
s->sz -= 4;
|
||||
}
|
||||
|
||||
s->nocopy = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Filter relocation entries, only keep those entries whose
|
||||
* symbol is in the keep list.
|
||||
@ -1028,8 +1102,11 @@ copy_shdr(struct elfcopy *ecp, struct section *s, const char *name, int copy,
|
||||
osh.sh_flags |= SHF_WRITE;
|
||||
if (sec_flags & SF_CODE)
|
||||
osh.sh_flags |= SHF_EXECINSTR;
|
||||
} else
|
||||
} else {
|
||||
osh.sh_flags = ish.sh_flags;
|
||||
if (ish.sh_type == SHT_REL || ish.sh_type == SHT_RELA)
|
||||
osh.sh_flags |= SHF_INFO_LINK;
|
||||
}
|
||||
}
|
||||
|
||||
if (name == NULL)
|
||||
|
@ -33,7 +33,7 @@
|
||||
|
||||
#include "elfcopy.h"
|
||||
|
||||
ELFTC_VCSID("$Id: symbols.c 3191 2015-05-04 17:07:01Z jkoshy $");
|
||||
ELFTC_VCSID("$Id: symbols.c 3222 2015-05-24 23:47:23Z kaiwang27 $");
|
||||
|
||||
/* Symbol table buffer structure. */
|
||||
struct symbuf {
|
||||
@ -77,7 +77,8 @@ static int is_weak_symbol(unsigned char st_info);
|
||||
static int lookup_exact_string(hash_head *hash, const char *buf,
|
||||
const char *s);
|
||||
static int generate_symbols(struct elfcopy *ecp);
|
||||
static void mark_symbols(struct elfcopy *ecp, size_t sc);
|
||||
static void mark_reloc_symbols(struct elfcopy *ecp, size_t sc);
|
||||
static void mark_section_group_symbols(struct elfcopy *ecp, size_t sc);
|
||||
static int match_wildcard(const char *name, const char *pattern);
|
||||
uint32_t str_hash(const char *s);
|
||||
|
||||
@ -160,6 +161,10 @@ is_needed_symbol(struct elfcopy *ecp, int i, GElf_Sym *s)
|
||||
if (BIT_ISSET(ecp->v_rel, i))
|
||||
return (1);
|
||||
|
||||
/* Symbols refered by COMDAT sections are needed. */
|
||||
if (BIT_ISSET(ecp->v_grp, i))
|
||||
return (1);
|
||||
|
||||
/*
|
||||
* For relocatable files (.o files), global and weak symbols
|
||||
* are needed.
|
||||
@ -207,7 +212,10 @@ is_remove_symbol(struct elfcopy *ecp, size_t sc, int i, GElf_Sym *s,
|
||||
return (1);
|
||||
|
||||
if (ecp->v_rel == NULL)
|
||||
mark_symbols(ecp, sc);
|
||||
mark_reloc_symbols(ecp, sc);
|
||||
|
||||
if (ecp->v_grp == NULL)
|
||||
mark_section_group_symbols(ecp, sc);
|
||||
|
||||
if (is_needed_symbol(ecp, i, s))
|
||||
return (0);
|
||||
@ -233,7 +241,7 @@ is_remove_symbol(struct elfcopy *ecp, size_t sc, int i, GElf_Sym *s,
|
||||
* Mark symbols refered by relocation entries.
|
||||
*/
|
||||
static void
|
||||
mark_symbols(struct elfcopy *ecp, size_t sc)
|
||||
mark_reloc_symbols(struct elfcopy *ecp, size_t sc)
|
||||
{
|
||||
const char *name;
|
||||
Elf_Data *d;
|
||||
@ -311,6 +319,49 @@ mark_symbols(struct elfcopy *ecp, size_t sc)
|
||||
elf_errmsg(elferr));
|
||||
}
|
||||
|
||||
static void
|
||||
mark_section_group_symbols(struct elfcopy *ecp, size_t sc)
|
||||
{
|
||||
const char *name;
|
||||
Elf_Scn *s;
|
||||
GElf_Shdr sh;
|
||||
size_t indx;
|
||||
int elferr;
|
||||
|
||||
ecp->v_grp = calloc((sc + 7) / 8, 1);
|
||||
if (ecp->v_grp == NULL)
|
||||
err(EXIT_FAILURE, "calloc failed");
|
||||
|
||||
if (elf_getshstrndx(ecp->ein, &indx) == 0)
|
||||
errx(EXIT_FAILURE, "elf_getshstrndx failed: %s",
|
||||
elf_errmsg(-1));
|
||||
|
||||
s = NULL;
|
||||
while ((s = elf_nextscn(ecp->ein, s)) != NULL) {
|
||||
if (gelf_getshdr(s, &sh) != &sh)
|
||||
errx(EXIT_FAILURE, "elf_getshdr failed: %s",
|
||||
elf_errmsg(-1));
|
||||
|
||||
if (sh.sh_type != SHT_GROUP)
|
||||
continue;
|
||||
|
||||
if ((name = elf_strptr(ecp->ein, indx, sh.sh_name)) == NULL)
|
||||
errx(EXIT_FAILURE, "elf_strptr failed: %s",
|
||||
elf_errmsg(-1));
|
||||
if (is_remove_section(ecp, name))
|
||||
continue;
|
||||
|
||||
if (sh.sh_info > 0 && sh.sh_info < sc)
|
||||
BIT_SET(ecp->v_grp, sh.sh_info);
|
||||
else if (sh.sh_info != 0)
|
||||
warnx("invalid symbox index");
|
||||
}
|
||||
elferr = elf_errno();
|
||||
if (elferr != 0)
|
||||
errx(EXIT_FAILURE, "elf_nextscn failed: %s",
|
||||
elf_errmsg(elferr));
|
||||
}
|
||||
|
||||
static int
|
||||
generate_symbols(struct elfcopy *ecp)
|
||||
{
|
||||
@ -351,6 +402,8 @@ generate_symbols(struct elfcopy *ecp)
|
||||
ecp->symtab->buf = sy_buf;
|
||||
ecp->strtab->buf = st_buf;
|
||||
|
||||
gsym = NULL;
|
||||
|
||||
/*
|
||||
* Create bit vector v_secsym, which is used to mark sections
|
||||
* that already have corresponding STT_SECTION symbols.
|
||||
@ -384,7 +437,7 @@ generate_symbols(struct elfcopy *ecp)
|
||||
/* Symbol table should exist if this function is called. */
|
||||
if (symndx == 0) {
|
||||
warnx("can't find .strtab section");
|
||||
return (0);
|
||||
goto clean;
|
||||
}
|
||||
|
||||
/* Locate .symtab of input object. */
|
||||
@ -413,7 +466,6 @@ generate_symbols(struct elfcopy *ecp)
|
||||
* output object, it is used by update_reloc() later to update
|
||||
* relocation information.
|
||||
*/
|
||||
gsym = NULL;
|
||||
sc = ish.sh_size / ish.sh_entsize;
|
||||
if (sc > 0) {
|
||||
ecp->symndx = calloc(sc, sizeof(*ecp->symndx));
|
||||
@ -427,7 +479,7 @@ generate_symbols(struct elfcopy *ecp)
|
||||
if (elferr != 0)
|
||||
errx(EXIT_FAILURE, "elf_getdata failed: %s",
|
||||
elf_errmsg(elferr));
|
||||
return (0);
|
||||
goto clean;
|
||||
}
|
||||
} else
|
||||
return (0);
|
||||
@ -523,7 +575,7 @@ generate_symbols(struct elfcopy *ecp)
|
||||
* check if that only local symbol is the reserved symbol.
|
||||
*/
|
||||
if (sy_buf->nls <= 1 && sy_buf->ngs == 0)
|
||||
return (0);
|
||||
goto clean;
|
||||
|
||||
/*
|
||||
* Create STT_SECTION symbols for sections that do not already
|
||||
@ -550,6 +602,7 @@ generate_symbols(struct elfcopy *ecp)
|
||||
sym.st_value = s->vma;
|
||||
sym.st_size = 0;
|
||||
sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
|
||||
sym.st_other = STV_DEFAULT;
|
||||
/*
|
||||
* Don't let add_to_symtab() touch sym.st_shndx.
|
||||
* In this case, we know the index already.
|
||||
@ -583,6 +636,12 @@ generate_symbols(struct elfcopy *ecp)
|
||||
}
|
||||
|
||||
return (1);
|
||||
|
||||
clean:
|
||||
free(gsym);
|
||||
free_symtab(ecp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
@ -624,7 +683,9 @@ create_symtab(struct elfcopy *ecp)
|
||||
if (((ecp->flags & SYMTAB_INTACT) == 0) && !generate_symbols(ecp)) {
|
||||
TAILQ_REMOVE(&ecp->v_sec, ecp->symtab, sec_list);
|
||||
TAILQ_REMOVE(&ecp->v_sec, ecp->strtab, sec_list);
|
||||
free(ecp->symtab->buf);
|
||||
free(ecp->symtab);
|
||||
free(ecp->strtab->buf);
|
||||
free(ecp->strtab);
|
||||
ecp->symtab = NULL;
|
||||
ecp->strtab = NULL;
|
||||
@ -697,6 +758,23 @@ free_symtab(struct elfcopy *ecp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ecp->symndx != NULL) {
|
||||
free(ecp->symndx);
|
||||
ecp->symndx = NULL;
|
||||
}
|
||||
if (ecp->v_rel != NULL) {
|
||||
free(ecp->v_rel);
|
||||
ecp->v_rel = NULL;
|
||||
}
|
||||
if (ecp->v_grp != NULL) {
|
||||
free(ecp->v_grp);
|
||||
ecp->v_grp = NULL;
|
||||
}
|
||||
if (ecp->v_secsym != NULL) {
|
||||
free(ecp->v_secsym);
|
||||
ecp->v_secsym = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
#include "_libdwarf.h"
|
||||
|
||||
ELFTC_VCSID("$Id: libdwarf_reloc.c 3149 2015-02-15 19:00:06Z emaste $");
|
||||
ELFTC_VCSID("$Id: libdwarf_reloc.c 3198 2015-05-14 18:36:19Z emaste $");
|
||||
|
||||
Dwarf_Unsigned
|
||||
_dwarf_get_reloc_type(Dwarf_P_Debug dbg, int is64)
|
||||
@ -75,6 +75,7 @@ _dwarf_get_reloc_size(Dwarf_Debug dbg, Dwarf_Unsigned rel_type)
|
||||
return (4);
|
||||
break;
|
||||
case EM_386:
|
||||
case EM_IAMCU:
|
||||
if (rel_type == R_386_32)
|
||||
return (4);
|
||||
break;
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
#include "_libelftc.h"
|
||||
|
||||
ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3194 2015-05-05 17:55:16Z emaste $");
|
||||
ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3212 2015-05-17 13:40:55Z kaiwang27 $");
|
||||
|
||||
/**
|
||||
* @file cpp_demangle.c
|
||||
@ -50,7 +50,7 @@ ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3194 2015-05-05 17:55:16Z emaste $");
|
||||
|
||||
enum type_qualifier {
|
||||
TYPE_PTR, TYPE_REF, TYPE_CMX, TYPE_IMG, TYPE_EXT, TYPE_RST, TYPE_VAT,
|
||||
TYPE_CST
|
||||
TYPE_CST, TYPE_VEC
|
||||
};
|
||||
|
||||
struct vector_type_qualifier {
|
||||
@ -84,6 +84,7 @@ struct cpp_demangle_data {
|
||||
int func_type;
|
||||
const char *cur; /* current mangled name ptr */
|
||||
const char *last_sname; /* last source name */
|
||||
int push_head;
|
||||
};
|
||||
|
||||
#define CPP_DEMANGLE_TRY_LIMIT 128
|
||||
@ -112,6 +113,8 @@ static int cpp_demangle_read_array(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_encoding(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_expr_primary(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_expression(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_expression_flat(struct cpp_demangle_data *,
|
||||
char **);
|
||||
static int cpp_demangle_read_expression_binary(struct cpp_demangle_data *,
|
||||
const char *, size_t);
|
||||
static int cpp_demangle_read_expression_unary(struct cpp_demangle_data *,
|
||||
@ -123,8 +126,12 @@ static int cpp_demangle_read_function(struct cpp_demangle_data *, int *,
|
||||
static int cpp_demangle_local_source_name(struct cpp_demangle_data *ddata);
|
||||
static int cpp_demangle_read_local_name(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_name(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_name_flat(struct cpp_demangle_data *,
|
||||
char**);
|
||||
static int cpp_demangle_read_nested_name(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_number(struct cpp_demangle_data *, long *);
|
||||
static int cpp_demangle_read_number_as_string(struct cpp_demangle_data *,
|
||||
char **);
|
||||
static int cpp_demangle_read_nv_offset(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_offset(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_offset_number(struct cpp_demangle_data *);
|
||||
@ -138,6 +145,8 @@ static int cpp_demangle_read_tmpl_arg(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_tmpl_args(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_tmpl_param(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_type(struct cpp_demangle_data *, int);
|
||||
static int cpp_demangle_read_type_flat(struct cpp_demangle_data *,
|
||||
char **);
|
||||
static int cpp_demangle_read_uqname(struct cpp_demangle_data *);
|
||||
static int cpp_demangle_read_v_offset(struct cpp_demangle_data *);
|
||||
static char *decode_fp_to_double(const char *, size_t);
|
||||
@ -156,8 +165,6 @@ static int vector_type_qualifier_init(struct vector_type_qualifier *);
|
||||
static int vector_type_qualifier_push(struct vector_type_qualifier *,
|
||||
enum type_qualifier);
|
||||
|
||||
static int cpp_demangle_gnu3_push_head;
|
||||
|
||||
/**
|
||||
* @brief Decode the input string by IA-64 C++ ABI style.
|
||||
*
|
||||
@ -190,7 +197,6 @@ cpp_demangle_gnu3(const char *org)
|
||||
if (!cpp_demangle_data_init(&ddata, org + 2))
|
||||
return (NULL);
|
||||
|
||||
cpp_demangle_gnu3_push_head = 0;
|
||||
rtn = NULL;
|
||||
|
||||
if (!cpp_demangle_read_encoding(&ddata))
|
||||
@ -277,6 +283,7 @@ cpp_demangle_data_init(struct cpp_demangle_data *d, const char *cur)
|
||||
d->func_type = 0;
|
||||
d->cur = cur;
|
||||
d->last_sname = NULL;
|
||||
d->push_head = 0;
|
||||
|
||||
return (1);
|
||||
|
||||
@ -309,7 +316,6 @@ cpp_demangle_push_fp(struct cpp_demangle_data *ddata,
|
||||
fp = ddata->cur;
|
||||
while (*ddata->cur != 'E')
|
||||
++ddata->cur;
|
||||
++ddata->cur;
|
||||
|
||||
if ((f = decoder(fp, ddata->cur - fp)) == NULL)
|
||||
return (0);
|
||||
@ -320,6 +326,8 @@ cpp_demangle_push_fp(struct cpp_demangle_data *ddata,
|
||||
|
||||
free(f);
|
||||
|
||||
++ddata->cur;
|
||||
|
||||
return (rtn);
|
||||
}
|
||||
|
||||
@ -331,7 +339,7 @@ cpp_demangle_push_str(struct cpp_demangle_data *ddata, const char *str,
|
||||
if (ddata == NULL || str == NULL || len == 0)
|
||||
return (0);
|
||||
|
||||
if (cpp_demangle_gnu3_push_head > 0)
|
||||
if (ddata->push_head > 0)
|
||||
return (vector_str_push(&ddata->output_tmp, str, len));
|
||||
|
||||
return (vector_str_push(&ddata->output, str, len));
|
||||
@ -403,7 +411,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, "*", 1))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
@ -414,7 +423,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, "&", 1))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
@ -425,7 +435,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, " complex", 8))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
@ -434,23 +445,26 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (!cpp_demangle_push_str(ddata, " imaginary", 10))
|
||||
goto clean;
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, " imaginary", 10))
|
||||
if (!vector_str_push(&subst_v, " imaginary",
|
||||
10))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
|
||||
case TYPE_EXT:
|
||||
if (e_idx > v->ext_name.size - 1)
|
||||
if (v->ext_name.size == 0 ||
|
||||
e_idx > v->ext_name.size - 1)
|
||||
goto clean;
|
||||
if ((e_len = strlen(v->ext_name.container[e_idx])) == 0)
|
||||
if ((e_len = strlen(v->ext_name.container[e_idx])) ==
|
||||
0)
|
||||
goto clean;
|
||||
if ((buf = malloc(sizeof(char) * (e_len + 1))) == NULL)
|
||||
if ((buf = malloc(e_len + 2)) == NULL)
|
||||
goto clean;
|
||||
|
||||
memcpy(buf, " ", 1);
|
||||
memcpy(buf + 1, v->ext_name.container[e_idx], e_len);
|
||||
snprintf(buf, e_len + 2, " %s",
|
||||
v->ext_name.container[e_idx]);
|
||||
|
||||
if (!cpp_demangle_push_str(ddata, buf, e_len + 1)) {
|
||||
free(buf);
|
||||
@ -463,7 +477,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
free(buf);
|
||||
goto clean;
|
||||
}
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v)) {
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v)) {
|
||||
free(buf);
|
||||
goto clean;
|
||||
}
|
||||
@ -478,7 +493,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, " restrict", 9))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
@ -489,7 +505,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, " volatile", 9))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
@ -500,11 +517,42 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, " const", 6))
|
||||
goto clean;
|
||||
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v))
|
||||
goto clean;
|
||||
}
|
||||
break;
|
||||
|
||||
case TYPE_VEC:
|
||||
if (v->ext_name.size == 0 ||
|
||||
e_idx > v->ext_name.size - 1)
|
||||
goto clean;
|
||||
if ((e_len = strlen(v->ext_name.container[e_idx])) ==
|
||||
0)
|
||||
goto clean;
|
||||
if ((buf = malloc(e_len + 12)) == NULL)
|
||||
goto clean;
|
||||
snprintf(buf, e_len + 12, " __vector(%s)",
|
||||
v->ext_name.container[e_idx]);
|
||||
if (!cpp_demangle_push_str(ddata, buf, e_len + 11)) {
|
||||
free(buf);
|
||||
goto clean;
|
||||
}
|
||||
if (type_str != NULL) {
|
||||
if (!vector_str_push(&subst_v, buf,
|
||||
e_len + 11)) {
|
||||
free(buf);
|
||||
goto clean;
|
||||
}
|
||||
if (!cpp_demangle_push_subst_v(ddata,
|
||||
&subst_v)) {
|
||||
free(buf);
|
||||
goto clean;
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
++e_idx;
|
||||
break;
|
||||
};
|
||||
--idx;
|
||||
}
|
||||
@ -655,10 +703,14 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
|
||||
|
||||
switch (*ddata->cur) {
|
||||
case 'b':
|
||||
if (*(ddata->cur + 2) != 'E')
|
||||
return (0);
|
||||
switch (*(++ddata->cur)) {
|
||||
case '0':
|
||||
ddata->cur += 2;
|
||||
return (cpp_demangle_push_str(ddata, "false", 5));
|
||||
case '1':
|
||||
ddata->cur += 2;
|
||||
return (cpp_demangle_push_str(ddata, "true", 4));
|
||||
default:
|
||||
return (0);
|
||||
@ -707,7 +759,8 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
|
||||
++ddata->cur;
|
||||
}
|
||||
++ddata->cur;
|
||||
return (cpp_demangle_push_str(ddata, num, ddata->cur - num));
|
||||
return (cpp_demangle_push_str(ddata, num,
|
||||
ddata->cur - num - 1));
|
||||
|
||||
default:
|
||||
return (0);
|
||||
@ -998,6 +1051,38 @@ cpp_demangle_read_expression(struct cpp_demangle_data *ddata)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_expression_flat(struct cpp_demangle_data *ddata, char **str)
|
||||
{
|
||||
struct vector_str *output;
|
||||
size_t i, p_idx, idx, exp_len;
|
||||
char *exp;
|
||||
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp :
|
||||
&ddata->output;
|
||||
|
||||
p_idx = output->size;
|
||||
|
||||
if (!cpp_demangle_read_expression(ddata))
|
||||
return (0);
|
||||
|
||||
if ((exp = vector_str_substr(output, p_idx, output->size - 1,
|
||||
&exp_len)) == NULL)
|
||||
return (0);
|
||||
|
||||
idx = output->size;
|
||||
for (i = p_idx; i < idx; ++i) {
|
||||
if (!vector_str_pop(output)) {
|
||||
free(exp);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
*str = exp;
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_expression_binary(struct cpp_demangle_data *ddata,
|
||||
const char *name, size_t len)
|
||||
@ -1127,12 +1212,65 @@ cpp_demangle_read_function(struct cpp_demangle_data *ddata, int *ext_c,
|
||||
static int
|
||||
cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
|
||||
{
|
||||
char *name, *type, *num_str;
|
||||
long offset;
|
||||
int rtn;
|
||||
|
||||
if (ddata == NULL || *ddata->cur == '\0')
|
||||
return (0);
|
||||
|
||||
/* special name */
|
||||
switch (SIMPLE_HASH(*ddata->cur, *(ddata->cur + 1))) {
|
||||
case SIMPLE_HASH('G', 'A'):
|
||||
if (!cpp_demangle_push_str(ddata, "hidden alias for ", 17))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_encoding(ddata));
|
||||
|
||||
case SIMPLE_HASH('G', 'R'):
|
||||
if (!cpp_demangle_push_str(ddata, "reference temporary #", 21))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
if (!cpp_demangle_read_name_flat(ddata, &name))
|
||||
return (0);
|
||||
rtn = 0;
|
||||
if (!cpp_demangle_read_number_as_string(ddata, &num_str))
|
||||
goto clean1;
|
||||
if (!cpp_demangle_push_str(ddata, num_str, strlen(num_str)))
|
||||
goto clean2;
|
||||
if (!cpp_demangle_push_str(ddata, " for ", 5))
|
||||
goto clean2;
|
||||
if (!cpp_demangle_push_str(ddata, name, strlen(name)))
|
||||
goto clean2;
|
||||
rtn = 1;
|
||||
clean2:
|
||||
free(num_str);
|
||||
clean1:
|
||||
free(name);
|
||||
return (rtn);
|
||||
|
||||
case SIMPLE_HASH('G', 'T'):
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
switch (*ddata->cur) {
|
||||
case 'n':
|
||||
if (!cpp_demangle_push_str(ddata,
|
||||
"non-transaction clone for ", 26))
|
||||
return (0);
|
||||
case 't':
|
||||
default:
|
||||
if (!cpp_demangle_push_str(ddata,
|
||||
"transaction clone for ", 22))
|
||||
return (0);
|
||||
}
|
||||
++ddata->cur;
|
||||
return (cpp_demangle_read_encoding(ddata));
|
||||
|
||||
case SIMPLE_HASH('G', 'V'):
|
||||
/* sentry object for 1 time init */
|
||||
if (!cpp_demangle_push_str(ddata, "guard variable for ", 20))
|
||||
@ -1154,14 +1292,49 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
|
||||
return (0);
|
||||
return (cpp_demangle_read_encoding(ddata));
|
||||
|
||||
case SIMPLE_HASH('T', 'C'):
|
||||
/* construction vtable */
|
||||
if (!cpp_demangle_push_str(ddata, "construction vtable for ",
|
||||
24))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
if (!cpp_demangle_read_type_flat(ddata, &type))
|
||||
return (0);
|
||||
rtn = 0;
|
||||
if (!cpp_demangle_read_number(ddata, &offset))
|
||||
goto clean3;
|
||||
if (*ddata->cur++ != '_')
|
||||
goto clean3;
|
||||
if (!cpp_demangle_read_type(ddata, 0))
|
||||
goto clean3;
|
||||
if (!cpp_demangle_push_str(ddata, "-in-", 4))
|
||||
goto clean3;
|
||||
if (!cpp_demangle_push_str(ddata, type, strlen(type)))
|
||||
goto clean3;
|
||||
rtn = 1;
|
||||
clean3:
|
||||
free(type);
|
||||
return (rtn);
|
||||
|
||||
case SIMPLE_HASH('T', 'D'):
|
||||
/* typeinfo common proxy */
|
||||
break;
|
||||
|
||||
case SIMPLE_HASH('T', 'F'):
|
||||
/* typeinfo fn */
|
||||
if (!cpp_demangle_push_str(ddata, "typeinfo fn for ", 16))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'h'):
|
||||
/* virtual function non-virtual override thunk */
|
||||
if (cpp_demangle_push_str(ddata,
|
||||
"virtual function non-virtual override ", 38) == 0)
|
||||
if (!cpp_demangle_push_str(ddata,
|
||||
"virtual function non-virtual override ", 38))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
@ -1170,24 +1343,51 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
|
||||
return (0);
|
||||
return (cpp_demangle_read_encoding(ddata));
|
||||
|
||||
case SIMPLE_HASH('T', 'I'):
|
||||
/* typeinfo structure */
|
||||
/* FALLTHROUGH */
|
||||
case SIMPLE_HASH('T', 'S'):
|
||||
/* RTTI name (NTBS) */
|
||||
if (!cpp_demangle_push_str(ddata, "typeinfo for ", 14))
|
||||
case SIMPLE_HASH('T', 'H'):
|
||||
/* TLS init function */
|
||||
if (!cpp_demangle_push_str(ddata, "TLS init function for ",
|
||||
22))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 1));
|
||||
break;
|
||||
|
||||
case SIMPLE_HASH('T', 'I'):
|
||||
/* typeinfo structure */
|
||||
if (!cpp_demangle_push_str(ddata, "typeinfo for ", 13))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'J'):
|
||||
/* java class */
|
||||
if (!cpp_demangle_push_str(ddata, "java Class for ", 15))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'S'):
|
||||
/* RTTI name (NTBS) */
|
||||
if (!cpp_demangle_push_str(ddata, "typeinfo name for ", 18))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'T'):
|
||||
/* VTT table */
|
||||
if (!cpp_demangle_push_str(ddata, "VTT for ", 8))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
return (cpp_demangle_read_type(ddata, 1));
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'v'):
|
||||
/* virtual function virtual override thunk */
|
||||
@ -1208,7 +1408,17 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
return (cpp_demangle_read_type(ddata, 1));
|
||||
return (cpp_demangle_read_type(ddata, 0));
|
||||
|
||||
case SIMPLE_HASH('T', 'W'):
|
||||
/* TLS wrapper function */
|
||||
if (!cpp_demangle_push_str(ddata, "TLS wrapper function for ",
|
||||
25))
|
||||
return (0);
|
||||
ddata->cur += 2;
|
||||
if (*ddata->cur == '\0')
|
||||
return (0);
|
||||
break;
|
||||
};
|
||||
|
||||
return (cpp_demangle_read_name(ddata));
|
||||
@ -1270,8 +1480,7 @@ cpp_demangle_read_name(struct cpp_demangle_data *ddata)
|
||||
if (ddata == NULL || *ddata->cur == '\0')
|
||||
return (0);
|
||||
|
||||
output = cpp_demangle_gnu3_push_head > 0 ?
|
||||
&ddata->output_tmp : &ddata->output;
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
|
||||
|
||||
subst_str = NULL;
|
||||
|
||||
@ -1326,6 +1535,38 @@ clean:
|
||||
return (rtn);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_name_flat(struct cpp_demangle_data *ddata, char **str)
|
||||
{
|
||||
struct vector_str *output;
|
||||
size_t i, p_idx, idx, name_len;
|
||||
char *name;
|
||||
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp :
|
||||
&ddata->output;
|
||||
|
||||
p_idx = output->size;
|
||||
|
||||
if (!cpp_demangle_read_name(ddata))
|
||||
return (0);
|
||||
|
||||
if ((name = vector_str_substr(output, p_idx, output->size - 1,
|
||||
&name_len)) == NULL)
|
||||
return (0);
|
||||
|
||||
idx = output->size;
|
||||
for (i = p_idx; i < idx; ++i) {
|
||||
if (!vector_str_pop(output)) {
|
||||
free(name);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
*str = name;
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_nested_name(struct cpp_demangle_data *ddata)
|
||||
{
|
||||
@ -1355,8 +1596,7 @@ cpp_demangle_read_nested_name(struct cpp_demangle_data *ddata)
|
||||
++ddata->cur;
|
||||
}
|
||||
|
||||
output = cpp_demangle_gnu3_push_head > 0 ?
|
||||
&ddata->output_tmp : &ddata->output;
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
|
||||
if (!vector_str_init(&v))
|
||||
return (0);
|
||||
|
||||
@ -1452,6 +1692,24 @@ cpp_demangle_read_number(struct cpp_demangle_data *ddata, long *rtn)
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_number_as_string(struct cpp_demangle_data *ddata, char **str)
|
||||
{
|
||||
long n;
|
||||
|
||||
if (!cpp_demangle_read_number(ddata, &n)) {
|
||||
*str = NULL;
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (asprintf(str, "%ld", n) < 0) {
|
||||
*str = NULL;
|
||||
return (0);
|
||||
}
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_nv_offset(struct cpp_demangle_data *ddata)
|
||||
{
|
||||
@ -1581,9 +1839,18 @@ static int
|
||||
cpp_demangle_read_sname(struct cpp_demangle_data *ddata)
|
||||
{
|
||||
long len;
|
||||
int err;
|
||||
|
||||
if (ddata == NULL || cpp_demangle_read_number(ddata, &len) == 0 ||
|
||||
len <= 0 || cpp_demangle_push_str(ddata, ddata->cur, len) == 0)
|
||||
len <= 0)
|
||||
return (0);
|
||||
|
||||
if (len == 12 && (memcmp("_GLOBAL__N_1", ddata->cur, 12) == 0))
|
||||
err = cpp_demangle_push_str(ddata, "(anonymous namespace)", 21);
|
||||
else
|
||||
err = cpp_demangle_push_str(ddata, ddata->cur, len);
|
||||
|
||||
if (err == 0)
|
||||
return (0);
|
||||
|
||||
assert(ddata->output.size > 0);
|
||||
@ -1732,8 +1999,7 @@ cpp_demangle_read_subst_std(struct cpp_demangle_data *ddata)
|
||||
|
||||
ddata->cur += 2;
|
||||
|
||||
output = cpp_demangle_gnu3_push_head > 0 ?
|
||||
&ddata->output_tmp : &ddata->output;
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
|
||||
|
||||
p_idx = output->size;
|
||||
if (!cpp_demangle_read_uqname(ddata))
|
||||
@ -1783,8 +2049,7 @@ cpp_demangle_read_subst_stdtmpl(struct cpp_demangle_data *ddata,
|
||||
if (ddata == NULL || str == NULL || len == 0)
|
||||
return (0);
|
||||
|
||||
output = cpp_demangle_gnu3_push_head > 0 ? &ddata->output_tmp :
|
||||
&ddata->output;
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
|
||||
|
||||
p_idx = output->size;
|
||||
substr = NULL;
|
||||
@ -1852,8 +2117,7 @@ cpp_demangle_read_tmpl_args(struct cpp_demangle_data *ddata)
|
||||
return (0);
|
||||
|
||||
limit = 0;
|
||||
v = cpp_demangle_gnu3_push_head > 0 ?
|
||||
&ddata->output_tmp : &ddata->output;
|
||||
v = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
|
||||
for (;;) {
|
||||
idx = v->size;
|
||||
if (!cpp_demangle_read_tmpl_arg(ddata))
|
||||
@ -1936,14 +2200,14 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
|
||||
size_t p_idx, type_str_len;
|
||||
int extern_c, is_builtin;
|
||||
long len;
|
||||
char *type_str;
|
||||
char *type_str, *exp_str, *num_str;
|
||||
|
||||
if (ddata == NULL)
|
||||
return (0);
|
||||
|
||||
output = &ddata->output;
|
||||
if (!strncmp(ddata->output.container[ddata->output.size - 1], ">", 1)) {
|
||||
cpp_demangle_gnu3_push_head++;
|
||||
ddata->push_head++;
|
||||
output = &ddata->output_tmp;
|
||||
} else if (delimit == 1) {
|
||||
if (ddata->paren == false) {
|
||||
@ -1978,7 +2242,7 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
|
||||
extern_c = 0;
|
||||
is_builtin = 1;
|
||||
p_idx = output->size;
|
||||
type_str = NULL;
|
||||
type_str = exp_str = num_str = NULL;
|
||||
again:
|
||||
/* builtin type */
|
||||
switch (*ddata->cur) {
|
||||
@ -2024,6 +2288,82 @@ again:
|
||||
++ddata->cur;
|
||||
goto rtn;
|
||||
|
||||
case 'D':
|
||||
++ddata->cur;
|
||||
switch (*ddata->cur) {
|
||||
case 'd':
|
||||
/* IEEE 754r decimal floating point (64 bits) */
|
||||
if (!cpp_demangle_push_str(ddata, "decimal64", 9))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'e':
|
||||
/* IEEE 754r decimal floating point (128 bits) */
|
||||
if (!cpp_demangle_push_str(ddata, "decimal128", 10))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'f':
|
||||
/* IEEE 754r decimal floating point (32 bits) */
|
||||
if (!cpp_demangle_push_str(ddata, "decimal32", 9))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'h':
|
||||
/* IEEE 754r half-precision floating point (16 bits) */
|
||||
if (!cpp_demangle_push_str(ddata, "half", 4))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'i':
|
||||
/* char32_t */
|
||||
if (!cpp_demangle_push_str(ddata, "char32_t", 8))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'n':
|
||||
/* std::nullptr_t (i.e., decltype(nullptr)) */
|
||||
if (!cpp_demangle_push_str(ddata, "decltype(nullptr)",
|
||||
17))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 's':
|
||||
/* char16_t */
|
||||
if (!cpp_demangle_push_str(ddata, "char16_t", 8))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
break;
|
||||
case 'v':
|
||||
/* gcc vector_size extension. */
|
||||
++ddata->cur;
|
||||
if (*ddata->cur == '_') {
|
||||
++ddata->cur;
|
||||
if (!cpp_demangle_read_expression_flat(ddata,
|
||||
&exp_str))
|
||||
goto clean;
|
||||
if (!vector_str_push(&v.ext_name, exp_str,
|
||||
strlen(exp_str)))
|
||||
goto clean;
|
||||
} else {
|
||||
if (!cpp_demangle_read_number_as_string(ddata,
|
||||
&num_str))
|
||||
goto clean;
|
||||
if (!vector_str_push(&v.ext_name, num_str,
|
||||
strlen(num_str)))
|
||||
goto clean;
|
||||
}
|
||||
if (*ddata->cur != '_')
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
if (!vector_type_qualifier_push(&v, TYPE_VEC))
|
||||
goto clean;
|
||||
goto again;
|
||||
default:
|
||||
goto clean;
|
||||
}
|
||||
goto rtn;
|
||||
|
||||
case 'e':
|
||||
/* long double */
|
||||
if (!cpp_demangle_push_str(ddata, "long double", 11))
|
||||
@ -2118,7 +2458,7 @@ again:
|
||||
|
||||
case 'o':
|
||||
/* unsigned __int128 */
|
||||
if (!cpp_demangle_push_str(ddata, "unsigned _;int128", 17))
|
||||
if (!cpp_demangle_push_str(ddata, "unsigned __int128", 17))
|
||||
goto clean;
|
||||
++ddata->cur;
|
||||
goto rtn;
|
||||
@ -2189,6 +2529,8 @@ again:
|
||||
if (!vector_str_push(&v.ext_name, ddata->cur, len))
|
||||
return (0);
|
||||
ddata->cur += len;
|
||||
if (!vector_type_qualifier_push(&v, TYPE_EXT))
|
||||
goto clean;
|
||||
goto again;
|
||||
|
||||
case 'v':
|
||||
@ -2253,14 +2595,16 @@ rtn:
|
||||
goto clean;
|
||||
|
||||
free(type_str);
|
||||
free(exp_str);
|
||||
free(num_str);
|
||||
vector_type_qualifier_dest(&v);
|
||||
|
||||
if (cpp_demangle_gnu3_push_head > 0) {
|
||||
if (ddata->push_head > 0) {
|
||||
if (*ddata->cur == 'I' && cpp_demangle_read_tmpl_args(ddata)
|
||||
== 0)
|
||||
return (0);
|
||||
|
||||
if (--cpp_demangle_gnu3_push_head > 0)
|
||||
if (--ddata->push_head > 0)
|
||||
return (1);
|
||||
|
||||
if (!vector_str_push(&ddata->output_tmp, " ", 1))
|
||||
@ -2284,11 +2628,45 @@ rtn:
|
||||
return (1);
|
||||
clean:
|
||||
free(type_str);
|
||||
free(exp_str);
|
||||
free(num_str);
|
||||
vector_type_qualifier_dest(&v);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
cpp_demangle_read_type_flat(struct cpp_demangle_data *ddata, char **str)
|
||||
{
|
||||
struct vector_str *output;
|
||||
size_t i, p_idx, idx, type_len;
|
||||
char *type;
|
||||
|
||||
output = ddata->push_head > 0 ? &ddata->output_tmp :
|
||||
&ddata->output;
|
||||
|
||||
p_idx = output->size;
|
||||
|
||||
if (!cpp_demangle_read_type(ddata, 0))
|
||||
return (0);
|
||||
|
||||
if ((type = vector_str_substr(output, p_idx, output->size - 1,
|
||||
&type_len)) == NULL)
|
||||
return (0);
|
||||
|
||||
idx = output->size;
|
||||
for (i = p_idx; i < idx; ++i) {
|
||||
if (!vector_str_pop(output)) {
|
||||
free(type);
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
*str = type;
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* read unqualified-name, unqualified name are operator-name, ctor-dtor-name,
|
||||
* source-name
|
||||
|
@ -1,3 +1,3 @@
|
||||
# $Id: os.Linux.mk 994 2010-06-13 10:39:19Z jkoshy $
|
||||
# $Id: os.Linux.mk 3210 2015-05-17 13:40:49Z kaiwang27 $
|
||||
|
||||
CFLAGS+= -Wall
|
||||
CFLAGS+= -Wall -D_GNU_SOURCE
|
||||
|
@ -22,7 +22,7 @@
|
||||
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $Id: readelf.1 3195 2015-05-12 17:22:19Z emaste $
|
||||
.\" $Id: readelf.1 3219 2015-05-24 23:42:34Z kaiwang27 $
|
||||
.\"
|
||||
.Dd September 13, 2012
|
||||
.Os
|
||||
@ -90,7 +90,7 @@ sections in the ELF object.
|
||||
.It Fl e | Fl -headers
|
||||
Print all program, file and section headers in the ELF object.
|
||||
.It Fl g | Fl -section-groups
|
||||
This option is recognized, but is ignored.
|
||||
Print the contents of the section groups in the ELF object.
|
||||
.It Fl h | Fl -file-header
|
||||
Print the file header of the ELF object.
|
||||
.It Fl l | Fl -program-headers
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*-
|
||||
* Copyright (c) 2009-2014 Kai Wang
|
||||
* Copyright (c) 2009-2015 Kai Wang
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include "_elftc.h"
|
||||
|
||||
ELFTC_VCSID("$Id: readelf.c 3189 2015-04-20 17:02:01Z emaste $");
|
||||
ELFTC_VCSID("$Id: readelf.c 3223 2015-05-25 20:37:57Z emaste $");
|
||||
|
||||
/*
|
||||
* readelf(1) options.
|
||||
@ -302,6 +302,7 @@ static void dump_gnu_hash(struct readelf *re, struct section *s);
|
||||
static void dump_hash(struct readelf *re);
|
||||
static void dump_phdr(struct readelf *re);
|
||||
static void dump_ppc_attributes(uint8_t *p, uint8_t *pe);
|
||||
static void dump_section_groups(struct readelf *re);
|
||||
static void dump_symtab(struct readelf *re, int i);
|
||||
static void dump_symtabs(struct readelf *re);
|
||||
static uint8_t *dump_unknown_tag(uint64_t tag, uint8_t *p);
|
||||
@ -445,6 +446,7 @@ elf_machine(unsigned int mach)
|
||||
case EM_SPARC: return "Sun SPARC";
|
||||
case EM_386: return "Intel i386";
|
||||
case EM_68K: return "Motorola 68000";
|
||||
case EM_IAMCU: return "Intel MCU";
|
||||
case EM_88K: return "Motorola 88000";
|
||||
case EM_860: return "Intel i860";
|
||||
case EM_MIPS: return "MIPS R3000 Big-Endian only";
|
||||
@ -1050,6 +1052,7 @@ r_type(unsigned int mach, unsigned int type)
|
||||
switch(mach) {
|
||||
case EM_NONE: return "";
|
||||
case EM_386:
|
||||
case EM_IAMCU:
|
||||
switch(type) {
|
||||
case 0: return "R_386_NONE";
|
||||
case 1: return "R_386_32";
|
||||
@ -2381,6 +2384,7 @@ dwarf_reg(unsigned int mach, unsigned int reg)
|
||||
|
||||
switch (mach) {
|
||||
case EM_386:
|
||||
case EM_IAMCU:
|
||||
switch (reg) {
|
||||
case 0: return "eax";
|
||||
case 1: return "ecx";
|
||||
@ -4047,6 +4051,61 @@ dump_liblist(struct readelf *re)
|
||||
|
||||
#undef Elf_Lib
|
||||
|
||||
static void
|
||||
dump_section_groups(struct readelf *re)
|
||||
{
|
||||
struct section *s;
|
||||
const char *symname;
|
||||
Elf_Data *d;
|
||||
uint32_t *w;
|
||||
int i, j, elferr;
|
||||
size_t n;
|
||||
|
||||
for (i = 0; (size_t) i < re->shnum; i++) {
|
||||
s = &re->sl[i];
|
||||
if (s->type != SHT_GROUP)
|
||||
continue;
|
||||
(void) elf_errno();
|
||||
if ((d = elf_getdata(s->scn, NULL)) == NULL) {
|
||||
elferr = elf_errno();
|
||||
if (elferr != 0)
|
||||
warnx("elf_getdata failed: %s",
|
||||
elf_errmsg(elferr));
|
||||
continue;
|
||||
}
|
||||
if (d->d_size <= 0)
|
||||
continue;
|
||||
|
||||
w = d->d_buf;
|
||||
|
||||
/* We only support COMDAT section. */
|
||||
#ifndef GRP_COMDAT
|
||||
#define GRP_COMDAT 0x1
|
||||
#endif
|
||||
if ((*w++ & GRP_COMDAT) == 0)
|
||||
return;
|
||||
|
||||
if (s->entsize == 0)
|
||||
s->entsize = 4;
|
||||
|
||||
symname = get_symbol_name(re, s->link, s->info);
|
||||
n = s->sz / s->entsize;
|
||||
if (n-- < 1)
|
||||
return;
|
||||
|
||||
printf("\nCOMDAT group section [%5d] `%s' [%s] contains %ju"
|
||||
" sections:\n", i, s->name, symname, (uintmax_t)n);
|
||||
printf(" %-10.10s %s\n", "[Index]", "Name");
|
||||
for (j = 0; (size_t) j < n; j++, w++) {
|
||||
if (*w >= re->shnum) {
|
||||
warnx("invalid section index: %u", *w);
|
||||
continue;
|
||||
}
|
||||
printf(" [%5u] %s\n", *w, re->sl[*w].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t *
|
||||
dump_unknown_tag(uint64_t tag, uint8_t *p)
|
||||
{
|
||||
@ -6838,6 +6897,8 @@ dump_elf(struct readelf *re)
|
||||
dump_phdr(re);
|
||||
if (re->options & RE_SS)
|
||||
dump_shdr(re);
|
||||
if (re->options & RE_G)
|
||||
dump_section_groups(re);
|
||||
if (re->options & RE_D)
|
||||
dump_dynamic(re);
|
||||
if (re->options & RE_R)
|
||||
@ -7311,7 +7372,7 @@ Usage: %s [options] file...\n\
|
||||
-c | --archive-index Print the archive symbol table for archives.\n\
|
||||
-d | --dynamic Print the contents of SHT_DYNAMIC sections.\n\
|
||||
-e | --headers Print all headers in the object.\n\
|
||||
-g | --section-groups (accepted, but ignored)\n\
|
||||
-g | --section-groups Print the contents of the section groups.\n\
|
||||
-h | --file-header Print the file header for the object.\n\
|
||||
-l | --program-headers Print the PHDR table for the object.\n\
|
||||
-n | --notes Print the contents of SHT_NOTE sections.\n\
|
||||
@ -7365,8 +7426,8 @@ main(int argc, char **argv)
|
||||
re->options |= RE_AA;
|
||||
break;
|
||||
case 'a':
|
||||
re->options |= RE_AA | RE_D | RE_H | RE_II | RE_L |
|
||||
RE_R | RE_SS | RE_S | RE_VV;
|
||||
re->options |= RE_AA | RE_D | RE_G | RE_H | RE_II |
|
||||
RE_L | RE_R | RE_SS | RE_S | RE_VV;
|
||||
break;
|
||||
case 'c':
|
||||
re->options |= RE_C;
|
||||
|
@ -134,6 +134,8 @@ namespace ISD {
|
||||
|
||||
/// Index original Function's argument.
|
||||
unsigned OrigArgIndex;
|
||||
/// Sentinel value for implicit machine-level input arguments.
|
||||
static const unsigned NoArgIndex = UINT_MAX;
|
||||
|
||||
/// Offset in bytes of current input value relative to the beginning of
|
||||
/// original argument. E.g. if argument was splitted into four 32 bit
|
||||
@ -147,6 +149,15 @@ namespace ISD {
|
||||
VT = vt.getSimpleVT();
|
||||
ArgVT = argvt;
|
||||
}
|
||||
|
||||
bool isOrigArg() const {
|
||||
return OrigArgIndex != NoArgIndex;
|
||||
}
|
||||
|
||||
unsigned getOrigArgIndex() const {
|
||||
assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument");
|
||||
return OrigArgIndex;
|
||||
}
|
||||
};
|
||||
|
||||
/// OutputArg - This struct carries flags and a value for a
|
||||
|
@ -2806,6 +2806,11 @@ public:
|
||||
virtual bool useLoadStackGuardNode() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if arguments should be sign-extended in lib calls.
|
||||
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
|
||||
return IsSigned;
|
||||
}
|
||||
};
|
||||
|
||||
/// Given an LLVM IR type and return type attributes, compute the return value
|
||||
|
@ -1776,9 +1776,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
|
||||
<< *IsomorphicInc << '\n');
|
||||
Value *NewInc = OrigInc;
|
||||
if (OrigInc->getType() != IsomorphicInc->getType()) {
|
||||
Instruction *IP = isa<PHINode>(OrigInc)
|
||||
? (Instruction*)L->getHeader()->getFirstInsertionPt()
|
||||
: OrigInc->getNextNode();
|
||||
Instruction *IP = nullptr;
|
||||
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
|
||||
IP = PN->getParent()->getFirstInsertionPt();
|
||||
else
|
||||
IP = OrigInc->getNextNode();
|
||||
|
||||
IRBuilder<> Builder(IP);
|
||||
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
|
||||
NewInc = Builder.
|
||||
|
@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
|
||||
I != E; ++I) {
|
||||
unsigned MappedDef = *I;
|
||||
// Source of copy is no longer available for propagation.
|
||||
if (AvailCopyMap.erase(MappedDef)) {
|
||||
for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
|
||||
AvailCopyMap.erase(*SR);
|
||||
}
|
||||
AvailCopyMap.erase(MappedDef);
|
||||
for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
|
||||
AvailCopyMap.erase(*SR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1160,13 +1160,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
|
||||
LegalOperations = Level >= AfterLegalizeVectorOps;
|
||||
LegalTypes = Level >= AfterLegalizeTypes;
|
||||
|
||||
// Early exit if this basic block is in an optnone function.
|
||||
AttributeSet FnAttrs =
|
||||
DAG.getMachineFunction().getFunction()->getAttributes();
|
||||
if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
|
||||
Attribute::OptimizeNone))
|
||||
return;
|
||||
|
||||
// Add all the dag nodes to the worklist.
|
||||
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
|
||||
E = DAG.allnodes_end(); I != E; ++I)
|
||||
@ -2788,9 +2781,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
||||
SplatBitSize = SplatBitSize * 2)
|
||||
SplatValue |= SplatValue.shl(SplatBitSize);
|
||||
|
||||
Constant = APInt::getAllOnesValue(BitWidth);
|
||||
for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
|
||||
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
|
||||
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
|
||||
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
|
||||
if (SplatBitSize % BitWidth == 0) {
|
||||
Constant = APInt::getAllOnesValue(BitWidth);
|
||||
for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
|
||||
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -11043,7 +11040,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
|
||||
// If the input vector is too large, try to split it.
|
||||
// We don't support having two input vectors that are too large.
|
||||
if (VecIn2.getNode())
|
||||
// If the zero vector was used, we can not split the vector,
|
||||
// since we'd need 3 inputs.
|
||||
if (UsesZeroVector || VecIn2.getNode())
|
||||
return SDValue();
|
||||
|
||||
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
|
||||
@ -11055,7 +11054,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
|
||||
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
|
||||
DAG.getConstant(0, TLI.getVectorIdxTy()));
|
||||
UsesZeroVector = false;
|
||||
} else
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -497,7 +497,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
|
||||
OI != E; ++OI) {
|
||||
const Value *Idx = *OI;
|
||||
if (auto *StTy = dyn_cast<StructType>(Ty)) {
|
||||
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
if (Field) {
|
||||
// N = N + Offset
|
||||
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
|
||||
@ -518,8 +518,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
|
||||
if (CI->isZero())
|
||||
continue;
|
||||
// N = N + Offset
|
||||
TotalOffs +=
|
||||
DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
|
||||
uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
|
||||
TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
|
||||
if (TotalOffs >= MaxOffs) {
|
||||
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
|
||||
if (!N) // Unhandled operand. Halt "fast" selection and bail.
|
||||
|
@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
|
||||
NVT, N->getOperand(0));
|
||||
return TLI.makeLibCall(DAG, LC,
|
||||
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
|
||||
&Op, 1, false, dl).first;
|
||||
&Op, 1, Signed, dl).first;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
|
||||
|
||||
// If one or more successors has been unscheduled, then the current
|
||||
// node is no longer available.
|
||||
if (!TrySU->isAvailable)
|
||||
if (!TrySU->isAvailable || !TrySU->NodeQueueId)
|
||||
CurSU = AvailableQueue->pop();
|
||||
else {
|
||||
// Available and in AvailableQueue
|
||||
AvailableQueue->remove(TrySU);
|
||||
CurSU = TrySU;
|
||||
}
|
||||
|
@ -3399,30 +3399,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
|
||||
Ty = StTy->getElementType(Field);
|
||||
} else {
|
||||
Ty = cast<SequentialType>(Ty)->getElementType();
|
||||
MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
|
||||
unsigned PtrSize = PtrTy.getSizeInBits();
|
||||
APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
|
||||
|
||||
// If this is a constant subscript, handle it quickly.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
|
||||
if (CI->isZero()) continue;
|
||||
uint64_t Offs =
|
||||
DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
|
||||
SDValue OffsVal;
|
||||
EVT PTy = TLI.getPointerTy(AS);
|
||||
unsigned PtrBits = PTy.getSizeInBits();
|
||||
if (PtrBits < 64)
|
||||
OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
|
||||
DAG.getConstant(Offs, MVT::i64));
|
||||
else
|
||||
OffsVal = DAG.getConstant(Offs, PTy);
|
||||
|
||||
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
|
||||
OffsVal);
|
||||
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
|
||||
if (CI->isZero())
|
||||
continue;
|
||||
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
|
||||
SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
|
||||
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
|
||||
continue;
|
||||
}
|
||||
|
||||
// N = N + Idx * ElementSize;
|
||||
APInt ElementSize =
|
||||
APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
|
||||
SDValue IdxN = getValue(Idx);
|
||||
|
||||
// If the index is smaller or larger than intptr_t, truncate or extend
|
||||
@ -5727,6 +5718,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
|
||||
// Skip the first return-type Attribute to get to params.
|
||||
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
|
||||
Args.push_back(Entry);
|
||||
|
||||
// If we have an explicit sret argument that is an Instruction, (i.e., it
|
||||
// might point to function-local memory), we can't meaningfully tail-call.
|
||||
if (Entry.isSRet && isa<Instruction>(V))
|
||||
isTailCall = false;
|
||||
}
|
||||
|
||||
// Check if target-independent constraints permit a tail call here.
|
||||
@ -7353,6 +7349,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
|
||||
Entry.Alignment = Align;
|
||||
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
|
||||
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
|
||||
|
||||
// sret demotion isn't compatible with tail-calls, since the sret argument
|
||||
// points into the callers stack frame.
|
||||
CLI.IsTailCall = false;
|
||||
} else {
|
||||
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
|
||||
EVT VT = RetTys[I];
|
||||
@ -7638,7 +7638,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
|
||||
ISD::ArgFlagsTy Flags;
|
||||
Flags.setSRet();
|
||||
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
|
||||
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
|
||||
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
|
||||
ISD::InputArg::NoArgIndex, 0);
|
||||
Ins.push_back(RetArg);
|
||||
}
|
||||
|
||||
|
@ -96,18 +96,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
|
||||
for (unsigned i = 0; i != NumOps; ++i) {
|
||||
Entry.Node = Ops[i];
|
||||
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
|
||||
Entry.isSExt = isSigned;
|
||||
Entry.isZExt = !isSigned;
|
||||
Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
|
||||
Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
|
||||
Args.push_back(Entry);
|
||||
}
|
||||
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
|
||||
|
||||
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
|
||||
TargetLowering::CallLoweringInfo CLI(DAG);
|
||||
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
|
||||
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
|
||||
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
|
||||
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
|
||||
.setSExtResult(isSigned).setZExtResult(!isSigned);
|
||||
.setSExtResult(signExtend).setZExtResult(!signExtend);
|
||||
return LowerCallTo(CLI);
|
||||
}
|
||||
|
||||
|
@ -177,25 +177,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
|
||||
}
|
||||
|
||||
template <typename Impl>
|
||||
void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg,
|
||||
void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
|
||||
ObjSectionToIDMap &SectionMap) {
|
||||
unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
|
||||
unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
|
||||
unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
|
||||
ObjSectionToIDMap::iterator i, e;
|
||||
|
||||
for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
|
||||
const SectionRef &Section = i->first;
|
||||
for (const auto &Section : Obj.sections()) {
|
||||
StringRef Name;
|
||||
Section.getName(Name);
|
||||
if (Name == "__eh_frame")
|
||||
EHFrameSID = i->second;
|
||||
else if (Name == "__text")
|
||||
TextSID = i->second;
|
||||
|
||||
// Force emission of the __text, __eh_frame, and __gcc_except_tab sections
|
||||
// if they're present. Otherwise call down to the impl to handle other
|
||||
// sections that have already been emitted.
|
||||
if (Name == "__text")
|
||||
TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
|
||||
else if (Name == "__eh_frame")
|
||||
EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
|
||||
else if (Name == "__gcc_except_tab")
|
||||
ExceptTabSID = i->second;
|
||||
else
|
||||
impl().finalizeSection(ObjImg, i->second, Section);
|
||||
ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
|
||||
else {
|
||||
auto I = SectionMap.find(Section);
|
||||
if (I != SectionMap.end())
|
||||
impl().finalizeSection(Obj, I->second, Section);
|
||||
}
|
||||
}
|
||||
UnregisteredEHFrameSections.push_back(
|
||||
EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
|
||||
@ -238,7 +243,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
|
||||
}
|
||||
|
||||
static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
|
||||
int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
|
||||
int64_t ObjDistance =
|
||||
static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
|
||||
int64_t MemDistance = A->LoadAddress - B->LoadAddress;
|
||||
return ObjDistance - MemDistance;
|
||||
}
|
||||
|
@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
|
||||
return ConstantInt::get(CI1->getContext(), C1V | C2V);
|
||||
case Instruction::Xor:
|
||||
return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
|
||||
case Instruction::Shl: {
|
||||
uint32_t shiftAmt = C2V.getZExtValue();
|
||||
if (shiftAmt < C1V.getBitWidth())
|
||||
return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
|
||||
else
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
}
|
||||
case Instruction::LShr: {
|
||||
uint32_t shiftAmt = C2V.getZExtValue();
|
||||
if (shiftAmt < C1V.getBitWidth())
|
||||
return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
|
||||
else
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
}
|
||||
case Instruction::AShr: {
|
||||
uint32_t shiftAmt = C2V.getZExtValue();
|
||||
if (shiftAmt < C1V.getBitWidth())
|
||||
return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
|
||||
else
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
}
|
||||
case Instruction::Shl:
|
||||
if (C2V.ult(C1V.getBitWidth()))
|
||||
return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
case Instruction::LShr:
|
||||
if (C2V.ult(C1V.getBitWidth()))
|
||||
return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
case Instruction::AShr:
|
||||
if (C2V.ult(C1V.getBitWidth()))
|
||||
return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
|
||||
return UndefValue::get(C1->getType()); // too big shift is undef
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -263,10 +263,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
|
||||
// required to combine the edge counts that are contained in the GCDA file.
|
||||
for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) {
|
||||
// The last block is always reserved for exit block
|
||||
if (BlockNo >= Blocks.size()-1) {
|
||||
if (BlockNo >= Blocks.size()) {
|
||||
errs() << "Unexpected number of edges (in " << Name << ").\n";
|
||||
return false;
|
||||
}
|
||||
if (BlockNo == Blocks.size() - 1)
|
||||
errs() << "(" << Name << ") has arcs from exit block.\n";
|
||||
GCOVBlock &Block = *Blocks[BlockNo];
|
||||
for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
|
||||
++EdgeNo) {
|
||||
|
@ -333,23 +333,12 @@ void Memory::InvalidateInstructionCache(const void *Addr,
|
||||
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
|
||||
asm volatile("icbi 0, %0" : : "r"(Line));
|
||||
asm volatile("isync");
|
||||
# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
|
||||
# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \
|
||||
defined(__GNUC__)
|
||||
// FIXME: Can we safely always call this for __GNUC__ everywhere?
|
||||
const char *Start = static_cast<const char *>(Addr);
|
||||
const char *End = Start + Len;
|
||||
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
|
||||
# elif defined(__mips__)
|
||||
const char *Start = static_cast<const char *>(Addr);
|
||||
# if defined(ANDROID)
|
||||
// The declaration of "cacheflush" in Android bionic:
|
||||
// extern int cacheflush(long start, long end, long flags);
|
||||
const char *End = Start + Len;
|
||||
long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
|
||||
long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
|
||||
cacheflush(LStart, LEnd, BCACHE);
|
||||
# else
|
||||
cacheflush(const_cast<char *>(Start), Len, BCACHE);
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif // end apple
|
||||
|
@ -10,9 +10,15 @@
|
||||
#ifdef HAVE___CHKSTK
|
||||
EXPLICIT_SYMBOL(__chkstk)
|
||||
#endif
|
||||
#ifdef HAVE___CHKSTK_MS
|
||||
EXPLICIT_SYMBOL(__chkstk_ms)
|
||||
#endif
|
||||
#ifdef HAVE____CHKSTK
|
||||
EXPLICIT_SYMBOL(___chkstk)
|
||||
#endif
|
||||
#ifdef HAVE____CHKSTK_MS
|
||||
EXPLICIT_SYMBOL(___chkstk_ms)
|
||||
#endif
|
||||
#ifdef HAVE___MAIN
|
||||
EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
|
||||
#endif
|
||||
|
@ -2031,18 +2031,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
|
||||
unsigned CurArgIdx = 0;
|
||||
for (unsigned i = 0; i != NumArgs; ++i) {
|
||||
MVT ValVT = Ins[i].VT;
|
||||
std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
|
||||
CurArgIdx = Ins[i].OrigArgIndex;
|
||||
|
||||
// Get type of the original argument.
|
||||
EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
|
||||
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
|
||||
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
|
||||
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
|
||||
ValVT = MVT::i8;
|
||||
else if (ActualMVT == MVT::i16)
|
||||
ValVT = MVT::i16;
|
||||
if (Ins[i].isOrigArg()) {
|
||||
std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
|
||||
CurArgIdx = Ins[i].getOrigArgIndex();
|
||||
|
||||
// Get type of the original argument.
|
||||
EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
|
||||
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
|
||||
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
|
||||
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
|
||||
ValVT = MVT::i8;
|
||||
else if (ActualMVT == MVT::i16)
|
||||
ValVT = MVT::i16;
|
||||
}
|
||||
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
|
||||
bool Res =
|
||||
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
|
||||
|
@ -3092,8 +3092,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
|
||||
CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
|
||||
if (Ins[VA.getValNo()].isOrigArg()) {
|
||||
std::advance(CurOrigArg,
|
||||
Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
|
||||
CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
|
||||
}
|
||||
// Arguments stored in registers.
|
||||
if (VA.isRegLoc()) {
|
||||
EVT RegVT = VA.getLocVT();
|
||||
@ -3173,7 +3176,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
assert(VA.isMemLoc());
|
||||
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
|
||||
|
||||
int index = ArgLocs[i].getValNo();
|
||||
int index = VA.getValNo();
|
||||
|
||||
// Some Ins[] entries become multiple ArgLoc[] entries.
|
||||
// Process them only once.
|
||||
@ -3186,6 +3189,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
// Since they could be overwritten by lowering of arguments in case of
|
||||
// a tail call.
|
||||
if (Flags.isByVal()) {
|
||||
assert(Ins[index].isOrigArg() &&
|
||||
"Byval arguments cannot be implicit");
|
||||
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
|
||||
|
||||
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());
|
||||
|
@ -3667,43 +3667,44 @@ bool MipsAsmParser::parseDirectiveModule() {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Lexer.is(AsmToken::Identifier)) {
|
||||
StringRef Option = Parser.getTok().getString();
|
||||
Parser.Lex();
|
||||
|
||||
if (Option == "oddspreg") {
|
||||
getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
|
||||
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
|
||||
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement)) {
|
||||
reportParseError("unexpected token, expected end of statement");
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (Option == "nooddspreg") {
|
||||
if (!isABI_O32()) {
|
||||
Error(L, "'.module nooddspreg' requires the O32 ABI");
|
||||
return false;
|
||||
}
|
||||
|
||||
getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
|
||||
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
|
||||
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement)) {
|
||||
reportParseError("unexpected token, expected end of statement");
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (Option == "fp") {
|
||||
return parseDirectiveModuleFP();
|
||||
}
|
||||
|
||||
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
|
||||
StringRef Option;
|
||||
if (Parser.parseIdentifier(Option)) {
|
||||
reportParseError("expected .module option identifier");
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (Option == "oddspreg") {
|
||||
getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
|
||||
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
|
||||
|
||||
// If this is not the end of the statement, report an error.
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement)) {
|
||||
reportParseError("unexpected token, expected end of statement");
|
||||
return false;
|
||||
}
|
||||
|
||||
return false; // parseDirectiveModule has finished successfully.
|
||||
} else if (Option == "nooddspreg") {
|
||||
if (!isABI_O32()) {
|
||||
Error(L, "'.module nooddspreg' requires the O32 ABI");
|
||||
return false;
|
||||
}
|
||||
|
||||
getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
|
||||
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
|
||||
|
||||
// If this is not the end of the statement, report an error.
|
||||
if (getLexer().isNot(AsmToken::EndOfStatement)) {
|
||||
reportParseError("unexpected token, expected end of statement");
|
||||
return false;
|
||||
}
|
||||
|
||||
return false; // parseDirectiveModule has finished successfully.
|
||||
} else if (Option == "fp") {
|
||||
return parseDirectiveModuleFP();
|
||||
} else {
|
||||
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
|
||||
}
|
||||
}
|
||||
|
||||
/// parseDirectiveModuleFP
|
||||
|
@ -259,6 +259,11 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
@ -304,6 +309,10 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
@ -1118,6 +1127,23 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
int Offset = fieldFromInstruction(Insn, 7, 9);
|
||||
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
|
||||
unsigned Base = fieldFromInstruction(Insn, 21, 5);
|
||||
|
||||
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
|
||||
|
||||
Inst.addOperand(MCOperand::CreateReg(Base));
|
||||
Inst.addOperand(MCOperand::CreateImm(Offset));
|
||||
Inst.addOperand(MCOperand::CreateImm(Hint));
|
||||
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeSyncI(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
@ -1354,6 +1380,23 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
int Offset = SignExtend32<11>(Insn & 0x07ff);
|
||||
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
|
||||
unsigned Base = fieldFromInstruction(Insn, 11, 5);
|
||||
|
||||
Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
|
||||
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
|
||||
|
||||
Inst.addOperand(MCOperand::CreateReg(Reg));
|
||||
Inst.addOperand(MCOperand::CreateReg(Base));
|
||||
Inst.addOperand(MCOperand::CreateImm(Offset));
|
||||
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
|
||||
unsigned Insn,
|
||||
uint64_t Address,
|
||||
|
@ -58,15 +58,15 @@ def MipsInstrInfo : InstrInfo;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
|
||||
"Disable SVR4-style position-independent code.">;
|
||||
"Disable SVR4-style position-independent code">;
|
||||
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
|
||||
"General Purpose Registers are 64-bit wide.">;
|
||||
"General Purpose Registers are 64-bit wide">;
|
||||
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
|
||||
"Support 64-bit FP registers.">;
|
||||
"Support 64-bit FP registers">;
|
||||
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
|
||||
"Support for FPXX.">;
|
||||
"Support for FPXX">;
|
||||
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
|
||||
"IEEE 754-2008 NaN encoding.">;
|
||||
"IEEE 754-2008 NaN encoding">;
|
||||
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
|
||||
"true", "Only supports single precision float">;
|
||||
def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
|
||||
@ -81,7 +81,7 @@ def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
|
||||
"Disable odd numbered single-precision "
|
||||
"registers">;
|
||||
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
|
||||
"true", "Enable vector FPU instructions.">;
|
||||
"true", "Enable vector FPU instructions">;
|
||||
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
|
||||
"Mips I ISA Support [highly experimental]">;
|
||||
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
|
||||
|
@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
|
||||
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
if (Amount == 0)
|
||||
return;
|
||||
|
||||
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
|
||||
BuildAddiuSpImm(MBB, I, Amount);
|
||||
else
|
||||
|
@ -379,7 +379,6 @@ class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
|
||||
list<dag> Pattern = [];
|
||||
bit isTerminator = 1;
|
||||
bit hasDelaySlot = 0;
|
||||
string DecoderMethod = "DecodeSimm16";
|
||||
}
|
||||
|
||||
class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
|
||||
@ -550,6 +549,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
|
||||
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
|
||||
string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
|
||||
list<dag> Pattern = [];
|
||||
string DecoderMethod = "DecodeCacheOpR6";
|
||||
}
|
||||
|
||||
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
|
||||
@ -561,6 +561,7 @@ class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
|
||||
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
|
||||
list<dag> Pattern = [];
|
||||
bit mayLoad = 1;
|
||||
string DecoderMethod = "DecodeFMemCop2R6";
|
||||
}
|
||||
|
||||
class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
|
||||
@ -572,6 +573,7 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
|
||||
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
|
||||
list<dag> Pattern = [];
|
||||
bit mayStore = 1;
|
||||
string DecoderMethod = "DecodeFMemCop2R6";
|
||||
}
|
||||
|
||||
class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
|
||||
|
@ -132,8 +132,8 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
|
||||
std::advance(FuncArg, Ins[i].OrigArgIndex);
|
||||
assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size());
|
||||
std::advance(FuncArg, Ins[i].getOrigArgIndex());
|
||||
|
||||
OriginalArgWasF128.push_back(
|
||||
originalTypeIsF128(FuncArg->getType(), nullptr));
|
||||
|
@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
|
||||
]>;
|
||||
|
||||
def CC_MipsN : CallingConv<[
|
||||
CCIfType<[i8, i16, i32],
|
||||
CCIfType<[i8, i16, i32, i64],
|
||||
CCIfSubtargetNot<"isLittle()",
|
||||
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
|
||||
|
||||
@ -159,6 +159,10 @@ def CC_MipsN : CallingConv<[
|
||||
// N32/64 variable arguments.
|
||||
// All arguments are passed in integer registers.
|
||||
def CC_MipsN_VarArg : CallingConv<[
|
||||
CCIfType<[i8, i16, i32, i64],
|
||||
CCIfSubtargetNot<"isLittle()",
|
||||
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
|
||||
|
||||
// All integers are promoted to 64-bit.
|
||||
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
|
||||
|
||||
|
@ -619,6 +619,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const MipsSubtarget &Subtarget) {
|
||||
if (DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
|
||||
|
||||
ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse);
|
||||
if (!FalseC || FalseC->getZExtValue())
|
||||
return SDValue();
|
||||
|
||||
// Since RHS (False) is 0, we swap the order of the True/False operands
|
||||
// (obviously also inverting the condition) so that we can
|
||||
// take advantage of conditional moves using the $0 register.
|
||||
// Example:
|
||||
// return (a != 0) ? x : 0;
|
||||
// load $reg, x
|
||||
// movz $reg, $0, a
|
||||
unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
|
||||
MipsISD::CMovFP_T;
|
||||
|
||||
SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
|
||||
return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
|
||||
ValueIfFalse, FCC, ValueIfTrue, Glue);
|
||||
}
|
||||
|
||||
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const MipsSubtarget &Subtarget) {
|
||||
@ -752,6 +779,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
|
||||
return performDivRemCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::SELECT:
|
||||
return performSELECTCombine(N, DAG, DCI, Subtarget);
|
||||
case MipsISD::CMovFP_F:
|
||||
case MipsISD::CMovFP_T:
|
||||
return performCMovFPCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::AND:
|
||||
return performANDCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::OR:
|
||||
@ -2039,7 +2069,7 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
|
||||
SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
|
||||
SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
|
||||
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
|
||||
DAG.getConstant(0x20, MVT::i32));
|
||||
DAG.getConstant(VT.getSizeInBits(), MVT::i32));
|
||||
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
|
||||
DAG.getConstant(0, VT), ShiftLeftLo);
|
||||
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
|
||||
@ -2078,11 +2108,12 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
|
||||
DL, VT, Hi, Shamt);
|
||||
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
|
||||
DAG.getConstant(0x20, MVT::i32));
|
||||
SDValue Shift31 = DAG.getNode(ISD::SRA, DL, VT, Hi, DAG.getConstant(31, VT));
|
||||
DAG.getConstant(VT.getSizeInBits(), MVT::i32));
|
||||
SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
|
||||
DAG.getConstant(VT.getSizeInBits() - 1, VT));
|
||||
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
|
||||
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
|
||||
IsSRA ? Shift31 : DAG.getConstant(0, VT), ShiftRightHi);
|
||||
IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi);
|
||||
|
||||
SDValue Ops[2] = {Lo, Hi};
|
||||
return DAG.getMergeValues(Ops, DL);
|
||||
@ -2902,13 +2933,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx);
|
||||
CurArgIdx = Ins[i].OrigArgIndex;
|
||||
if (Ins[i].isOrigArg()) {
|
||||
std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx);
|
||||
CurArgIdx = Ins[i].getOrigArgIndex();
|
||||
}
|
||||
EVT ValVT = VA.getValVT();
|
||||
ISD::ArgFlagsTy Flags = Ins[i].Flags;
|
||||
bool IsRegLoc = VA.isRegLoc();
|
||||
|
||||
if (Flags.isByVal()) {
|
||||
assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit");
|
||||
unsigned FirstByValReg, LastByValReg;
|
||||
unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
|
||||
CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
|
||||
@ -3029,6 +3063,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
|
||||
return CCInfo.CheckReturn(Outs, RetCC_Mips);
|
||||
}
|
||||
|
||||
bool
|
||||
MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
|
||||
if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
|
||||
if (Type == MVT::i32)
|
||||
return true;
|
||||
}
|
||||
return IsSigned;
|
||||
}
|
||||
|
||||
SDValue
|
||||
MipsTargetLowering::LowerReturn(SDValue Chain,
|
||||
CallingConv::ID CallConv, bool IsVarArg,
|
||||
|
@ -472,6 +472,8 @@ namespace llvm {
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
SDLoc dl, SelectionDAG &DAG) const override;
|
||||
|
||||
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
|
||||
|
||||
// Inline asm support
|
||||
ConstraintType
|
||||
getConstraintType(const std::string &Constraint) const override;
|
||||
|
@ -458,42 +458,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
|
||||
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
|
||||
|
||||
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
|
||||
MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
|
||||
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
|
||||
MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
|
||||
|
||||
let AdditionalPredicates = [NoNaNsFPMath] in {
|
||||
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
|
||||
MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
|
||||
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
|
||||
MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
|
||||
MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
|
||||
}
|
||||
|
||||
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
|
||||
MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
|
||||
MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
|
||||
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
|
||||
MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
|
||||
MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
|
||||
|
||||
let AdditionalPredicates = [NoNaNsFPMath] in {
|
||||
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
|
||||
MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
|
||||
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
|
||||
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
|
||||
MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
|
||||
MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
|
||||
}
|
||||
|
||||
let isCodeGenOnly=1 in {
|
||||
let DecoderNamespace = "Mips64" in {
|
||||
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
|
||||
MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
|
||||
MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
|
||||
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
|
||||
MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
|
||||
MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
|
||||
}
|
||||
|
||||
let AdditionalPredicates = [NoNaNsFPMath],
|
||||
isCodeGenOnly=1 in {
|
||||
DecoderNamespace = "Mips64" in {
|
||||
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
|
||||
MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
|
||||
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
|
||||
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
|
||||
MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
|
||||
MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -388,6 +388,8 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
|
||||
(sequence "W%u", 0, 31)>;
|
||||
def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
|
||||
(sequence "W%u", 0, 31)>;
|
||||
def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
|
||||
(decimate (sequence "W%u", 0, 31), 2)>;
|
||||
|
||||
def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
|
||||
MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;
|
||||
|
@ -258,8 +258,12 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
|
||||
CurDAG->getTargetConstant(Mips::sub_32, VT));
|
||||
}
|
||||
|
||||
SDNode *AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT,
|
||||
SDValue(Carry, 0), RHS);
|
||||
// Generate a second addition only if we know that RHS is not a
|
||||
// constant-zero node.
|
||||
SDNode *AddCarry = Carry;
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
|
||||
if (!C || C->getZExtValue())
|
||||
AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
|
||||
|
||||
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
|
||||
SDValue(AddCarry, 0));
|
||||
|
@ -2883,10 +2883,21 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
|
||||
unsigned Ws = MI->getOperand(1).getReg();
|
||||
unsigned Lane = MI->getOperand(2).getImm();
|
||||
|
||||
if (Lane == 0)
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
|
||||
else {
|
||||
unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
|
||||
if (Lane == 0) {
|
||||
unsigned Wt = Ws;
|
||||
if (!Subtarget.useOddSPReg()) {
|
||||
// We must copy to an even-numbered MSA register so that the
|
||||
// single-precision sub-register is also guaranteed to be even-numbered.
|
||||
Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
|
||||
}
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
|
||||
} else {
|
||||
unsigned Wt = RegInfo.createVirtualRegister(
|
||||
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
|
||||
&Mips::MSA128WEvensRegClass);
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
|
||||
@ -2948,7 +2959,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
|
||||
unsigned Wd_in = MI->getOperand(1).getReg();
|
||||
unsigned Lane = MI->getOperand(2).getImm();
|
||||
unsigned Fs = MI->getOperand(3).getReg();
|
||||
unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
|
||||
unsigned Wt = RegInfo.createVirtualRegister(
|
||||
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
|
||||
&Mips::MSA128WEvensRegClass);
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
|
||||
.addImm(0)
|
||||
|
@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
|
||||
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
|
||||
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
|
||||
|
||||
if (Amount == 0)
|
||||
return;
|
||||
|
||||
if (isInt<16>(Amount))// addi sp, sp, amount
|
||||
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
|
||||
else { // Expand immediate that doesn't fit in 16-bit.
|
||||
|
@ -2688,9 +2688,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
unsigned ObjSize = ObjectVT.getStoreSize();
|
||||
unsigned ArgSize = ObjSize;
|
||||
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
|
||||
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
|
||||
CurArgIdx = Ins[ArgNo].OrigArgIndex;
|
||||
|
||||
if (Ins[ArgNo].isOrigArg()) {
|
||||
std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
|
||||
CurArgIdx = Ins[ArgNo].getOrigArgIndex();
|
||||
}
|
||||
/* Respect alignment of argument on the stack. */
|
||||
unsigned Align =
|
||||
CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
|
||||
@ -2704,6 +2705,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
// FIXME the codegen can be much improved in some cases.
|
||||
// We do not have to keep everything in memory.
|
||||
if (Flags.isByVal()) {
|
||||
assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
|
||||
|
||||
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
|
||||
ObjSize = Flags.getByValSize();
|
||||
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
||||
@ -3064,9 +3067,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
|
||||
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
|
||||
unsigned ArgSize = ObjSize;
|
||||
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
|
||||
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
|
||||
CurArgIdx = Ins[ArgNo].OrigArgIndex;
|
||||
|
||||
if (Ins[ArgNo].isOrigArg()) {
|
||||
std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
|
||||
CurArgIdx = Ins[ArgNo].getOrigArgIndex();
|
||||
}
|
||||
unsigned CurArgOffset = ArgOffset;
|
||||
|
||||
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
|
||||
@ -3087,6 +3091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
|
||||
// FIXME the codegen can be much improved in some cases.
|
||||
// We do not have to keep everything in memory.
|
||||
if (Flags.isByVal()) {
|
||||
assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
|
||||
|
||||
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
|
||||
ObjSize = Flags.getByValSize();
|
||||
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
|
||||
|
@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
|
||||
"true",
|
||||
"Enable spilling of VGPRs to scratch memory">;
|
||||
|
||||
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
||||
"SGPRInitBug",
|
||||
"true",
|
||||
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
|
@ -40,7 +40,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||
std::vector<Function*> FuncsToClone;
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||
Function &F = *I;
|
||||
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
|
||||
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
|
||||
!F.hasFnAttribute(Attribute::NoInline))
|
||||
FuncsToClone.push_back(&F);
|
||||
}
|
||||
|
||||
@ -54,7 +55,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
|
||||
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||
Function &F = *I;
|
||||
if (F.hasLocalLinkage()) {
|
||||
if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
|
||||
F.addFnAttr(Attribute::AlwaysInline);
|
||||
}
|
||||
}
|
||||
|
@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
|
||||
ProgInfo.NumVGPR = MaxVGPR + 1;
|
||||
ProgInfo.NumSGPR = MaxSGPR + 1;
|
||||
|
||||
if (STM.hasSGPRInitBug()) {
|
||||
if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
|
||||
llvm_unreachable("Too many SGPRs used with the SGPR init bug");
|
||||
|
||||
ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
|
||||
}
|
||||
|
||||
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
|
||||
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
|
||||
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
|
||||
|
@ -439,6 +439,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::STORE: {
|
||||
// Handle i64 stores here for the same reason mentioned above for loads.
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
SDValue Value = ST->getValue();
|
||||
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
|
||||
break;
|
||||
|
||||
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
|
||||
MVT::v2i32, Value);
|
||||
SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
|
||||
ST->getBasePtr(), ST->getMemOperand());
|
||||
|
||||
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
|
||||
|
||||
if (NewValue.getOpcode() == ISD::BITCAST) {
|
||||
Select(NewStore.getNode());
|
||||
return SelectCode(NewValue.getNode());
|
||||
}
|
||||
|
||||
// getNode() may fold the bitcast if its input was another bitcast. If that
|
||||
// happens we should only select the new store.
|
||||
N = NewStore.getNode();
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPUISD::REGISTER_LOAD: {
|
||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||
break;
|
||||
@ -761,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
|
||||
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
|
||||
}
|
||||
|
||||
// We need to handle this here because tablegen doesn't support matching
|
||||
// instructions with multiple outputs.
|
||||
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
SDLoc SL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -770,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
unsigned Opc
|
||||
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
||||
|
||||
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
|
||||
SDValue Ops[] = {
|
||||
Zero, // src0_modifiers
|
||||
N->getOperand(0), // src0
|
||||
Zero, // src1_modifiers
|
||||
N->getOperand(1), // src1
|
||||
Zero, // src2_modifiers
|
||||
N->getOperand(2), // src2
|
||||
False, // clamp
|
||||
Zero // omod
|
||||
};
|
||||
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
|
||||
SDValue Ops[8];
|
||||
|
||||
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
|
||||
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
|
||||
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
|
||||
}
|
||||
|
||||
|
@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::i64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
|
||||
|
||||
@ -162,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
// Custom lowering of vector stores is required for local address space
|
||||
// stores.
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
// XXX: Native v2i32 local address space stores are possible, but not
|
||||
// currently implemented.
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
||||
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
||||
@ -832,11 +826,9 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
||||
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SmallVector<SDValue, 8> Args;
|
||||
SDValue A = Op.getOperand(0);
|
||||
SDValue B = Op.getOperand(1);
|
||||
|
||||
DAG.ExtractVectorElements(A, Args);
|
||||
DAG.ExtractVectorElements(B, Args);
|
||||
for (const SDUse &U : Op->ops())
|
||||
DAG.ExtractVectorElements(U.get(), Args);
|
||||
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
|
||||
}
|
||||
@ -881,9 +873,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return LowerIntrinsicIABS(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDGPU_lrp:
|
||||
return LowerIntrinsicLRP(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDGPU_fract:
|
||||
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
|
||||
case AMDGPUIntrinsic::AMDGPU_clamp:
|
||||
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
|
||||
@ -913,10 +902,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
}
|
||||
|
||||
case Intrinsic::AMDGPU_div_fmas:
|
||||
// FIXME: Dropping bool parameter. Work is needed to support the implicit
|
||||
// read from VCC.
|
||||
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
|
||||
Op.getOperand(4));
|
||||
|
||||
case Intrinsic::AMDGPU_div_fixup:
|
||||
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
|
||||
|
@ -140,6 +140,12 @@ public:
|
||||
/// not exist. If Opcode is not a pseudo instruction, this is identity.
|
||||
int pseudoToMCOpcode(int Opcode) const;
|
||||
|
||||
/// \brief Return the descriptor of the target-specific machine instruction
|
||||
/// that corresponds to the specified pseudo or native opcode.
|
||||
const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
|
||||
return get(pseudoToMCOpcode(Opcode));
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Pure virtual funtions to be implemented by sub-classes.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
|
||||
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
|
||||
>;
|
||||
|
||||
// float, float, float, vcc
|
||||
def AMDGPUFmasOp : SDTypeProfile<1, 4,
|
||||
[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Nodes
|
||||
//
|
||||
@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
|
||||
|
||||
// Special case divide FMA with scale and flags (src0 = Quotient,
|
||||
// src1 = Denominator, src2 = Numerator).
|
||||
def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
|
||||
def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
|
||||
|
||||
// Single or double precision division fixup.
|
||||
// Special case divide fixup and flags(src0 = Quotient, src1 =
|
||||
|
@ -164,10 +164,6 @@ class PrivateStore <SDPatternOperator op> : PrivateMemOp <
|
||||
(ops node:$value, node:$ptr), (op node:$value, node:$ptr)
|
||||
>;
|
||||
|
||||
def extloadi8_private : PrivateLoad <extloadi8>;
|
||||
def sextloadi8_private : PrivateLoad <sextloadi8>;
|
||||
def extloadi16_private : PrivateLoad <extloadi16>;
|
||||
def sextloadi16_private : PrivateLoad <sextloadi16>;
|
||||
def load_private : PrivateLoad <load>;
|
||||
|
||||
def truncstorei8_private : PrivateStore <truncstorei8>;
|
||||
@ -231,6 +227,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def extloadi8_private : PrivateLoad <az_extloadi8>;
|
||||
def sextloadi8_private : PrivateLoad <sextloadi8>;
|
||||
|
||||
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
|
||||
}]>;
|
||||
@ -267,6 +266,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
|
||||
return isLocalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
def extloadi16_private : PrivateLoad <az_extloadi16>;
|
||||
def sextloadi16_private : PrivateLoad <sextloadi16>;
|
||||
|
||||
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
|
||||
}]>;
|
||||
@ -649,17 +651,10 @@ class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
|
||||
(RcpInst $src)
|
||||
>;
|
||||
|
||||
multiclass RsqPat<Instruction RsqInst, ValueType vt> {
|
||||
def : Pat <
|
||||
(fdiv FP_ONE, (fsqrt vt:$src)),
|
||||
(RsqInst $src)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(AMDGPUrcp (fsqrt vt:$src)),
|
||||
(RsqInst $src)
|
||||
>;
|
||||
}
|
||||
class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
|
||||
(AMDGPUrcp (fsqrt vt:$src)),
|
||||
(RsqInst $src)
|
||||
>;
|
||||
|
||||
include "R600Instructions.td"
|
||||
include "R700Instructions.td"
|
||||
|
@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
||||
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
|
||||
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
|
||||
FlatAddressSpace(false), EnableIRStructurizer(true),
|
||||
EnablePromoteAlloca(false), EnableIfCvt(true),
|
||||
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||
EnableVGPRSpilling(false),
|
||||
EnableVGPRSpilling(false),SGPRInitBug(false),
|
||||
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
64 * 16, // Maximum stack alignment (long16)
|
||||
|
@ -45,6 +45,10 @@ public:
|
||||
VOLCANIC_ISLANDS,
|
||||
};
|
||||
|
||||
enum {
|
||||
FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
|
||||
};
|
||||
|
||||
private:
|
||||
std::string DevName;
|
||||
bool Is64bit;
|
||||
@ -66,6 +70,7 @@ private:
|
||||
bool CFALUBug;
|
||||
int LocalMemorySize;
|
||||
bool EnableVGPRSpilling;
|
||||
bool SGPRInitBug;
|
||||
|
||||
const DataLayout DL;
|
||||
AMDGPUFrameLowering FrameLowering;
|
||||
@ -203,6 +208,10 @@ public:
|
||||
return LocalMemorySize;
|
||||
}
|
||||
|
||||
bool hasSGPRInitBug() const {
|
||||
return SGPRInitBug;
|
||||
}
|
||||
|
||||
unsigned getAmdKernelCodeChipID() const;
|
||||
|
||||
bool enableMachineScheduler() const override {
|
||||
|
@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>;
|
||||
def COS_cm : COS_Common<0x8E>;
|
||||
} // End isVector = 1
|
||||
|
||||
defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
|
||||
def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
|
||||
|
||||
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
|
||||
|
||||
|
@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
|
||||
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
|
||||
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
|
||||
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
|
||||
defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
|
||||
def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
|
||||
def SIN_eg : SIN_Common<0x8D>;
|
||||
def COS_eg : COS_Common<0x8E>;
|
||||
|
||||
|
@ -291,6 +291,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||
printImmediate64(Op.getImm(), O);
|
||||
else
|
||||
llvm_unreachable("Invalid register class size");
|
||||
} else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
|
||||
printImmediate32(Op.getImm(), O);
|
||||
} else {
|
||||
// We hit this for the immediate instruction bits that don't yet have a
|
||||
// custom printer.
|
||||
|
@ -113,8 +113,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
|
||||
// Volcanic Islands
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>;
|
||||
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
|
||||
[FeatureVolcanicIslands, FeatureSGPRInitBug]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
|
||||
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
|
||||
[FeatureVolcanicIslands, FeatureSGPRInitBug]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
|
||||
|
@ -838,6 +838,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
case Intrinsic::AMDGPU_rsq:
|
||||
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
|
||||
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
|
||||
|
||||
case AMDGPUIntrinsic::AMDGPU_fract:
|
||||
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
}
|
||||
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
|
||||
break;
|
||||
@ -1694,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
// XXX - I think PartOffset should give you this, but it seems to give the
|
||||
// size of the register which isn't useful.
|
||||
|
||||
unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
|
||||
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
||||
unsigned PartOffset = VA.getLocMemOffset();
|
||||
unsigned Offset = 36 + VA.getLocMemOffset();
|
||||
|
||||
|
@ -1193,7 +1193,7 @@ let Predicates = [isR600] in {
|
||||
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
|
||||
|
||||
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
|
||||
defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
|
||||
def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
|
||||
|
||||
def : FROUNDPat <CNDGE_r600, CNDGT_r600>;
|
||||
|
||||
|
@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
|
||||
|
||||
void insertElse(BranchInst *Term);
|
||||
|
||||
Value *handleLoopCondition(Value *Cond, PHINode *Broken);
|
||||
Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
|
||||
|
||||
void handleLoop(BranchInst *Term);
|
||||
|
||||
@ -207,8 +207,17 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
|
||||
}
|
||||
|
||||
/// \brief Recursively handle the condition leading to a loop
|
||||
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
|
||||
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
|
||||
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
|
||||
llvm::Loop *L) {
|
||||
|
||||
// Only search through PHI nodes which are inside the loop. If we try this
|
||||
// with PHI nodes that are outside of the loop, we end up inserting new PHI
|
||||
// nodes outside of the loop which depend on values defined inside the loop.
|
||||
// This will break the module with
|
||||
// 'Instruction does not dominate all users!' errors.
|
||||
PHINode *Phi = nullptr;
|
||||
if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
|
||||
|
||||
BasicBlock *Parent = Phi->getParent();
|
||||
PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
|
||||
Value *Ret = NewPhi;
|
||||
@ -223,7 +232,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
|
||||
}
|
||||
|
||||
Phi->setIncomingValue(i, BoolFalse);
|
||||
Value *PhiArg = handleLoopCondition(Incoming, Broken);
|
||||
Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
|
||||
NewPhi->addIncoming(PhiArg, From);
|
||||
}
|
||||
|
||||
@ -253,7 +262,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
|
||||
|
||||
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
|
||||
BasicBlock *Parent = Inst->getParent();
|
||||
TerminatorInst *Insert = Parent->getTerminator();
|
||||
Instruction *Insert;
|
||||
if (L->contains(Inst)) {
|
||||
Insert = Parent->getTerminator();
|
||||
} else {
|
||||
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
|
||||
}
|
||||
Value *Args[] = { Cond, Broken };
|
||||
return CallInst::Create(IfBreak, Args, "", Insert);
|
||||
|
||||
@ -265,14 +279,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
|
||||
|
||||
/// \brief Handle a back edge (loop)
|
||||
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
|
||||
BasicBlock *BB = Term->getParent();
|
||||
llvm::Loop *L = LI->getLoopFor(BB);
|
||||
BasicBlock *Target = Term->getSuccessor(1);
|
||||
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
|
||||
|
||||
Value *Cond = Term->getCondition();
|
||||
Term->setCondition(BoolTrue);
|
||||
Value *Arg = handleLoopCondition(Cond, Broken);
|
||||
Value *Arg = handleLoopCondition(Cond, Broken, L);
|
||||
|
||||
BasicBlock *BB = Term->getParent();
|
||||
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
|
||||
PI != PE; ++PI) {
|
||||
|
||||
|
@ -35,7 +35,8 @@ enum {
|
||||
SMRD = 1 << 16,
|
||||
DS = 1 << 17,
|
||||
MIMG = 1 << 18,
|
||||
FLAT = 1 << 19
|
||||
FLAT = 1 << 19,
|
||||
WQM = 1 << 20
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -209,7 +209,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
APInt Imm;
|
||||
|
||||
if (FoldingImm) {
|
||||
const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
|
||||
unsigned UseReg = UseOp.getReg();
|
||||
const TargetRegisterClass *UseRC
|
||||
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
|
||||
MRI.getRegClass(UseReg) :
|
||||
TRI.getRegClass(UseReg);
|
||||
|
||||
Imm = APInt(64, OpToFold.getImm());
|
||||
|
||||
// Split 64-bit constants into 32-bits for folding.
|
||||
@ -228,8 +233,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
|
||||
// In order to fold immediates into copies, we need to change the
|
||||
// copy to a MOV.
|
||||
if (UseMI->getOpcode() == AMDGPU::COPY) {
|
||||
unsigned MovOp = TII->getMovOpcode(
|
||||
MRI.getRegClass(UseMI->getOperand(0).getReg()));
|
||||
unsigned DestReg = UseMI->getOperand(0).getReg();
|
||||
const TargetRegisterClass *DestRC
|
||||
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
|
||||
MRI.getRegClass(DestReg) :
|
||||
TRI.getRegClass(DestReg);
|
||||
|
||||
unsigned MovOp = TII->getMovOpcode(DestRC);
|
||||
if (MovOp == AMDGPU::COPY)
|
||||
continue;
|
||||
|
||||
|
@ -89,8 +89,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::i1, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT, MVT::i64, Custom);
|
||||
@ -158,8 +156,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
for (MVT VT : MVT::fp_valuetypes())
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
|
||||
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||
@ -214,6 +210,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
}
|
||||
|
||||
setOperationAction(ISD::FDIV, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FDIV, MVT::f64, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::FADD);
|
||||
setTargetDAGCombine(ISD::FSUB);
|
||||
@ -314,9 +311,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
if (!VT.isSimple() || VT == MVT::Other)
|
||||
return false;
|
||||
|
||||
// XXX - CI changes say "Support for unaligned memory accesses" but I don't
|
||||
// see what for specifically. The wording everywhere else seems to be the
|
||||
// same.
|
||||
// TODO - CI+ supports unaligned memory accesses, but this requires driver
|
||||
// support.
|
||||
|
||||
// XXX - The only mention I see of this in the ISA manual is for LDS direct
|
||||
// reads the "byte address and must be dword aligned". Is it also true for the
|
||||
@ -328,12 +324,18 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
||||
return Align % 4 == 0;
|
||||
}
|
||||
|
||||
// Smaller than dword value must be aligned.
|
||||
// FIXME: This should be allowed on CI+
|
||||
if (VT.bitsLT(MVT::i32))
|
||||
return false;
|
||||
|
||||
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
|
||||
// byte-address are ignored, thus forcing Dword alignment.
|
||||
// This applies to private, global, and constant memory.
|
||||
if (IsFast)
|
||||
*IsFast = true;
|
||||
return VT.bitsGT(MVT::i32);
|
||||
|
||||
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
|
||||
}
|
||||
|
||||
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
|
||||
@ -448,7 +450,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
|
||||
// three or five element vertex only needs three or five registers,
|
||||
// NOT four or eigth.
|
||||
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
|
||||
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
|
||||
unsigned NumElements = ParamType->getVectorNumElements();
|
||||
|
||||
for (unsigned j = 0; j != NumElements; ++j) {
|
||||
@ -531,7 +533,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
Offset, Ins[i].Flags.isSExt());
|
||||
|
||||
const PointerType *ParamTy =
|
||||
dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
|
||||
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
|
||||
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
|
||||
// On SI local pointers are just offsets into LDS, so they are always
|
||||
@ -566,7 +568,7 @@ SDValue SITargetLowering::LowerFormalArguments(
|
||||
if (Arg.VT.isVector()) {
|
||||
|
||||
// Build a vector from the registers
|
||||
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
|
||||
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
|
||||
unsigned NumElements = ParamType->getVectorNumElements();
|
||||
|
||||
SmallVector<SDValue, 4> Regs;
|
||||
@ -919,6 +921,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
Op.getOperand(3));
|
||||
|
||||
case AMDGPUIntrinsic::AMDGPU_fract:
|
||||
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
|
||||
return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
|
||||
DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
|
||||
|
||||
default:
|
||||
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
}
|
||||
@ -1104,7 +1112,70 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
if (DAG.getTarget().Options.UnsafeFPMath)
|
||||
return LowerFastFDIV(Op, DAG);
|
||||
|
||||
SDLoc SL(Op);
|
||||
SDValue X = Op.getOperand(0);
|
||||
SDValue Y = Op.getOperand(1);
|
||||
|
||||
const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
|
||||
|
||||
SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
|
||||
|
||||
SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
|
||||
|
||||
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
|
||||
|
||||
SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
|
||||
|
||||
SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
|
||||
|
||||
SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
|
||||
|
||||
SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
|
||||
|
||||
SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
|
||||
|
||||
SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
|
||||
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
|
||||
|
||||
SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
|
||||
NegDivScale0, Mul, DivScale1);
|
||||
|
||||
SDValue Scale;
|
||||
|
||||
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
|
||||
// Workaround a hardware bug on SI where the condition output from div_scale
|
||||
// is not usable.
|
||||
|
||||
const SDValue Hi = DAG.getConstant(1, MVT::i32);
|
||||
|
||||
// Figure out if the scale to use for div_fmas.
|
||||
SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
|
||||
SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
|
||||
SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
|
||||
SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
|
||||
|
||||
SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
|
||||
SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
|
||||
|
||||
SDValue Scale0Hi
|
||||
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
|
||||
SDValue Scale1Hi
|
||||
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
|
||||
|
||||
SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
|
||||
SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
|
||||
Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
|
||||
} else {
|
||||
Scale = DivScale1.getValue(1);
|
||||
}
|
||||
|
||||
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
|
||||
Fma4, Fma3, Mul, Scale);
|
||||
|
||||
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -1125,11 +1196,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Store->getMemoryVT();
|
||||
|
||||
// These stores are legal.
|
||||
if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
|
||||
VT.isVector() && VT.getVectorNumElements() == 2 &&
|
||||
VT.getVectorElementType() == MVT::i32)
|
||||
return SDValue();
|
||||
|
||||
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
|
||||
if (VT.isVector() && VT.getVectorNumElements() > 4)
|
||||
return ScalarizeVectorStore(Op, DAG);
|
||||
@ -1524,6 +1590,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case AMDGPUISD::UMAX:
|
||||
case AMDGPUISD::UMIN: {
|
||||
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
|
||||
N->getValueType(0) != MVT::f64 &&
|
||||
getTargetMachine().getOptLevel() > CodeGenOpt::None)
|
||||
return performMin3Max3Combine(N, DCI);
|
||||
break;
|
||||
|
@ -82,6 +82,8 @@ private:
|
||||
/// \brief Type of the last opcode.
|
||||
InstType LastOpcodeType;
|
||||
|
||||
bool LastInstWritesM0;
|
||||
|
||||
/// \brief Get increment/decrement amount for this instruction.
|
||||
Counters getHwCounts(MachineInstr &MI);
|
||||
|
||||
@ -106,6 +108,9 @@ private:
|
||||
/// \brief Resolve all operand dependencies to counter requirements
|
||||
Counters handleOperands(MachineInstr &MI);
|
||||
|
||||
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
|
||||
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
|
||||
public:
|
||||
SIInsertWaits(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID),
|
||||
@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
|
||||
// Insert a NOP to break the clause.
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
|
||||
.addImm(0);
|
||||
LastInstWritesM0 = false;
|
||||
}
|
||||
|
||||
if (TII->isSMRD(I->getOpcode()))
|
||||
@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
|
||||
((Counts.Named.LGKM & 0x7) << 8));
|
||||
|
||||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
|
||||
return Result;
|
||||
}
|
||||
|
||||
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return;
|
||||
|
||||
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
|
||||
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
|
||||
LastInstWritesM0 = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Set whether this instruction sets M0
|
||||
LastInstWritesM0 = false;
|
||||
|
||||
unsigned NumOperands = I->getNumOperands();
|
||||
for (unsigned i = 0; i < NumOperands; i++) {
|
||||
const MachineOperand &Op = I->getOperand(i);
|
||||
|
||||
if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
|
||||
LastInstWritesM0 = true;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
|
||||
// around other non-memory instructions.
|
||||
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
WaitedOn = ZeroCounts;
|
||||
LastIssued = ZeroCounts;
|
||||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
|
||||
memset(&UsedRegs, 0, sizeof(UsedRegs));
|
||||
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
|
||||
@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
||||
Changes |= insertWait(MBB, I, LastIssued);
|
||||
else
|
||||
Changes |= insertWait(MBB, I, handleOperands(*I));
|
||||
|
||||
pushInstruction(MBB, I);
|
||||
handleSendMsg(MBB, I);
|
||||
}
|
||||
|
||||
// Wait for everything at the end of the MBB
|
||||
|
@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
field bits<1> DS = 0;
|
||||
field bits<1> MIMG = 0;
|
||||
field bits<1> FLAT = 0;
|
||||
field bits<1> WQM = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = VM_CNT;
|
||||
@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
let TSFlags{17} = DS;
|
||||
let TSFlags{18} = MIMG;
|
||||
let TSFlags{19} = FLAT;
|
||||
let TSFlags{20} = WQM;
|
||||
|
||||
// Most instructions require adjustments after selection to satisfy
|
||||
// operand requirements.
|
||||
@ -295,18 +297,32 @@ class VOP1e <bits<8> op> : Enc32 {
|
||||
}
|
||||
|
||||
class VOP2e <bits<6> op> : Enc32 {
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> src1;
|
||||
|
||||
bits<8> VDST;
|
||||
bits<9> SRC0;
|
||||
bits<8> VSRC1;
|
||||
|
||||
let Inst{8-0} = SRC0;
|
||||
let Inst{16-9} = VSRC1;
|
||||
let Inst{24-17} = VDST;
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = src1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
}
|
||||
|
||||
class VOP2_MADKe <bits<6> op> : Enc64 {
|
||||
|
||||
bits<8> vdst;
|
||||
bits<9> src0;
|
||||
bits<8> vsrc1;
|
||||
bits<32> src2;
|
||||
|
||||
let Inst{8-0} = src0;
|
||||
let Inst{16-9} = vsrc1;
|
||||
let Inst{24-17} = vdst;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; // encoding
|
||||
let Inst{63-32} = src2;
|
||||
}
|
||||
|
||||
class VOP3e <bits<9> op> : Enc64 {
|
||||
|
||||
bits<8> dst;
|
||||
@ -554,9 +570,6 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
|
||||
|
||||
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
|
||||
|
||||
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
|
||||
VOPCCommon <ins, asm, pattern>, VOPCe <op>;
|
||||
|
||||
@ -585,9 +598,6 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
let SchedRW = [WriteLDS];
|
||||
}
|
||||
|
||||
class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
DS <outs, ins, asm, pattern>, DSe<op>;
|
||||
|
||||
class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI<outs, ins, asm, pattern> {
|
||||
|
||||
|
@ -121,12 +121,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||
if (Load0->getOperand(0) != Load1->getOperand(0))
|
||||
return false;
|
||||
|
||||
const ConstantSDNode *Load0Offset =
|
||||
dyn_cast<ConstantSDNode>(Load0->getOperand(1));
|
||||
const ConstantSDNode *Load1Offset =
|
||||
dyn_cast<ConstantSDNode>(Load1->getOperand(1));
|
||||
|
||||
if (!Load0Offset || !Load1Offset)
|
||||
return false;
|
||||
|
||||
// Check chain.
|
||||
if (findChainOperand(Load0) != findChainOperand(Load1))
|
||||
return false;
|
||||
|
||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
|
||||
Offset0 = Load0Offset->getZExtValue();
|
||||
Offset1 = Load1Offset->getZExtValue();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -333,6 +341,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
return;
|
||||
|
||||
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
|
||||
if (DestReg == AMDGPU::VCC) {
|
||||
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
} else {
|
||||
// FIXME: Hack until VReg_1 removed.
|
||||
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
|
||||
.addImm(0)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
@ -408,11 +431,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
|
||||
int NewOpc;
|
||||
|
||||
// Try to map original to commuted opcode
|
||||
if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
|
||||
NewOpc = AMDGPU::getCommuteRev(Opcode);
|
||||
// Check if the commuted (REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
|
||||
// Try to map commuted to original opcode
|
||||
if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
|
||||
NewOpc = AMDGPU::getCommuteOrig(Opcode);
|
||||
// Check if the original (non-REV) opcode exists on the target.
|
||||
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
|
||||
return NewOpc;
|
||||
|
||||
return Opcode;
|
||||
@ -1121,6 +1148,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
return false;
|
||||
}
|
||||
|
||||
int RegClass = Desc.OpInfo[i].RegClass;
|
||||
|
||||
switch (Desc.OpInfo[i].OperandType) {
|
||||
case MCOI::OPERAND_REGISTER:
|
||||
if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
|
||||
@ -1131,7 +1160,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
case AMDGPU::OPERAND_REG_IMM32:
|
||||
break;
|
||||
case AMDGPU::OPERAND_REG_INLINE_C:
|
||||
if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
|
||||
if (isLiteralConstant(MI->getOperand(i))) {
|
||||
ErrInfo = "Illegal immediate value for operand.";
|
||||
return false;
|
||||
}
|
||||
@ -1152,7 +1181,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
if (!MI->getOperand(i).isReg())
|
||||
continue;
|
||||
|
||||
int RegClass = Desc.OpInfo[i].RegClass;
|
||||
if (RegClass != -1) {
|
||||
unsigned Reg = MI->getOperand(i).getReg();
|
||||
if (TargetRegisterInfo::isVirtualRegister(Reg))
|
||||
@ -1197,31 +1225,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
|
||||
}
|
||||
}
|
||||
|
||||
// Verify SRC1 for VOP2 and VOPC
|
||||
if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
|
||||
const MachineOperand &Src1 = MI->getOperand(Src1Idx);
|
||||
if (Src1.isImm()) {
|
||||
ErrInfo = "VOP[2C] src1 cannot be an immediate.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify VOP3
|
||||
if (isVOP3(Opcode)) {
|
||||
if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
|
||||
ErrInfo = "VOP3 src0 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
|
||||
ErrInfo = "VOP3 src1 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
|
||||
ErrInfo = "VOP3 src2 cannot be a literal constant.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify misc. restrictions on specific instructions.
|
||||
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
|
||||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
|
||||
@ -1292,6 +1295,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
|
||||
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
|
||||
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
|
||||
case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2043,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHL_B64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHLREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_ASHR_I64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_ASHRREV_I64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
case AMDGPU::S_LSHR_B64:
|
||||
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
NewOpcode = AMDGPU::V_LSHRREV_B64;
|
||||
swapOperands(Inst);
|
||||
}
|
||||
break;
|
||||
|
||||
case AMDGPU::S_BFE_U64:
|
||||
case AMDGPU::S_BFM_B64:
|
||||
|
@ -204,6 +204,10 @@ public:
|
||||
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
|
||||
}
|
||||
|
||||
bool isWQM(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::WQM;
|
||||
}
|
||||
|
||||
bool isInlineConstant(const APInt &Imm) const;
|
||||
bool isInlineConstant(const MachineOperand &MO) const;
|
||||
bool isLiteralConstant(const MachineOperand &MO) const;
|
||||
@ -243,7 +247,27 @@ public:
|
||||
/// the register class of its machine operand.
|
||||
/// to infer the correct register class base on the other operands.
|
||||
const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
|
||||
unsigned OpNo) const;\
|
||||
unsigned OpNo) const;
|
||||
|
||||
/// \brief Return the size in bytes of the operand OpNo on the given
|
||||
// instruction opcode.
|
||||
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
|
||||
const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
|
||||
|
||||
if (OpInfo.RegClass == -1) {
|
||||
// If this is an immediate operand, this must be a 32-bit literal.
|
||||
assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
|
||||
return 4;
|
||||
}
|
||||
|
||||
return RI.getRegClass(OpInfo.RegClass)->getSize();
|
||||
}
|
||||
|
||||
/// \brief This form should usually be preferred since it handles operands
|
||||
/// with unknown register classes.
|
||||
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
|
||||
return getOpRegClass(MI, OpNo)->getSize();
|
||||
}
|
||||
|
||||
/// \returns true if it is legal for the operand at index \p OpNo
|
||||
/// to read a VGPR.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -152,9 +152,11 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
|
||||
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
|
||||
>;
|
||||
|
||||
//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
|
||||
defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
|
||||
//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
|
||||
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
|
||||
defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
|
||||
[(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
|
||||
>;
|
||||
defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
|
||||
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
|
||||
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
|
||||
>;
|
||||
@ -764,88 +766,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
|
||||
def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
|
||||
def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
|
||||
def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
|
||||
def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
|
||||
def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
|
||||
def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
|
||||
def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
|
||||
def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
|
||||
def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
|
||||
def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
|
||||
def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
|
||||
def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
|
||||
def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
|
||||
def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
|
||||
def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
|
||||
def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
|
||||
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
|
||||
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
|
||||
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
|
||||
defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
|
||||
defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
|
||||
defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
|
||||
defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
|
||||
defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
|
||||
defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
|
||||
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
|
||||
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
|
||||
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
|
||||
defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
|
||||
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
|
||||
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
|
||||
defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
|
||||
defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
|
||||
|
||||
def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
|
||||
def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
|
||||
def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
|
||||
def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
|
||||
def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
|
||||
def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
|
||||
def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
|
||||
def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
|
||||
def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
|
||||
def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
|
||||
def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
|
||||
def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
|
||||
def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
|
||||
def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
|
||||
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
|
||||
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
|
||||
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
|
||||
defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
|
||||
defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
|
||||
defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
|
||||
defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
|
||||
defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
|
||||
defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
|
||||
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
|
||||
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
|
||||
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
|
||||
defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
|
||||
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
|
||||
//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
|
||||
//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
|
||||
def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
|
||||
def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
|
||||
def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
|
||||
def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
|
||||
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
|
||||
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
|
||||
defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
|
||||
defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
|
||||
|
||||
let SubtargetPredicate = isCI in {
|
||||
def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
|
||||
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
|
||||
} // End isCI
|
||||
|
||||
|
||||
def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
|
||||
def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
|
||||
def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
|
||||
def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
|
||||
def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
|
||||
def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
|
||||
def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
|
||||
def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
|
||||
def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
|
||||
def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
|
||||
def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
|
||||
def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
|
||||
def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
|
||||
def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
|
||||
def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
|
||||
def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
|
||||
def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
|
||||
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
|
||||
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
|
||||
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
|
||||
defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
|
||||
defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
|
||||
defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
|
||||
defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
|
||||
defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
|
||||
defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
|
||||
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
|
||||
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
|
||||
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
|
||||
defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
|
||||
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
|
||||
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
|
||||
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
|
||||
defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
|
||||
|
||||
def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
|
||||
def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
|
||||
def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
|
||||
def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
|
||||
def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
|
||||
def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
|
||||
def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
|
||||
def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
|
||||
def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
|
||||
def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
|
||||
def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
|
||||
def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
|
||||
def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
|
||||
def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
|
||||
defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
|
||||
defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
|
||||
defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
|
||||
defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
|
||||
defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
|
||||
defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
|
||||
defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
|
||||
defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
|
||||
defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
|
||||
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
|
||||
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
|
||||
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
|
||||
defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
|
||||
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
|
||||
//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
|
||||
//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
|
||||
def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
|
||||
def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
|
||||
def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
|
||||
def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
|
||||
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
|
||||
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
|
||||
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
|
||||
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
|
||||
|
||||
//let SubtargetPredicate = isCI in {
|
||||
// DS_CONDXCHG32_RTN_B64
|
||||
@ -874,123 +876,120 @@ defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg
|
||||
|
||||
defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
|
||||
defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
|
||||
defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
|
||||
defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
|
||||
defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>;
|
||||
defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MUBUF Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
|
||||
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
|
||||
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
|
||||
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
|
||||
//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
|
||||
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
|
||||
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
|
||||
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
|
||||
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
|
||||
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
|
||||
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
|
||||
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
|
||||
//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
|
||||
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
|
||||
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
|
||||
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
|
||||
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
|
||||
0x00000008, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
|
||||
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
|
||||
>;
|
||||
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
|
||||
0x00000009, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
|
||||
mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
|
||||
>;
|
||||
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
|
||||
0x0000000a, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
|
||||
mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
|
||||
>;
|
||||
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
|
||||
0x0000000b, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
|
||||
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
|
||||
>;
|
||||
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
|
||||
0x0000000c, "buffer_load_dword", VGPR_32, i32, global_load
|
||||
mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
|
||||
>;
|
||||
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
|
||||
0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
|
||||
mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
|
||||
>;
|
||||
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
|
||||
0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
|
||||
mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
|
||||
>;
|
||||
|
||||
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
|
||||
0x00000018, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
|
||||
mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
|
||||
>;
|
||||
|
||||
defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
|
||||
0x0000001a, "buffer_store_short", VGPR_32, i32, truncstorei16_global
|
||||
mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
|
||||
>;
|
||||
|
||||
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
|
||||
0x0000001c, "buffer_store_dword", VGPR_32, i32, global_store
|
||||
mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
|
||||
>;
|
||||
|
||||
defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
|
||||
0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
|
||||
mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
|
||||
>;
|
||||
|
||||
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
|
||||
0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
|
||||
mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
|
||||
>;
|
||||
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
|
||||
|
||||
defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
|
||||
0x00000030, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
|
||||
mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
|
||||
>;
|
||||
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
|
||||
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
|
||||
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
|
||||
0x00000032, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
|
||||
mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
|
||||
0x00000033, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
|
||||
mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
|
||||
>;
|
||||
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
|
||||
//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
|
||||
defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
|
||||
0x00000035, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
|
||||
mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
|
||||
0x00000036, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
|
||||
mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
|
||||
0x00000037, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
|
||||
mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
|
||||
0x00000038, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
|
||||
mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
|
||||
0x00000039, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
|
||||
mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
|
||||
0x0000003a, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
|
||||
mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
|
||||
>;
|
||||
defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
|
||||
0x0000003b, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
|
||||
mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
|
||||
>;
|
||||
//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
|
||||
//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
|
||||
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
|
||||
//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
|
||||
//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
|
||||
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
|
||||
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
|
||||
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
|
||||
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
|
||||
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
|
||||
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
|
||||
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
|
||||
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
|
||||
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
|
||||
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
|
||||
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
|
||||
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
|
||||
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
|
||||
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
|
||||
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
|
||||
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
|
||||
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
|
||||
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
|
||||
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
|
||||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
|
||||
//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
|
||||
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
|
||||
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
|
||||
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
|
||||
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
|
||||
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
|
||||
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
|
||||
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
|
||||
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
|
||||
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
|
||||
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
|
||||
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
|
||||
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
|
||||
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
|
||||
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
|
||||
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
|
||||
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
|
||||
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
|
||||
//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
|
||||
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Instructions
|
||||
@ -1037,63 +1036,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
|
||||
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
|
||||
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
|
||||
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
|
||||
defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
|
||||
defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
|
||||
defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">;
|
||||
defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
|
||||
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
|
||||
defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
|
||||
defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
|
||||
defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
|
||||
defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
|
||||
defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
|
||||
defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
|
||||
defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
|
||||
defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
|
||||
defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
|
||||
defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
|
||||
defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
|
||||
defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
|
||||
defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
|
||||
defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
|
||||
defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
|
||||
defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
|
||||
defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
|
||||
defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
|
||||
defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
|
||||
defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
|
||||
defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
|
||||
defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
|
||||
defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
|
||||
defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
|
||||
defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
|
||||
defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
|
||||
defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
|
||||
defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
|
||||
defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
|
||||
defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
|
||||
defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
|
||||
defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
|
||||
defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
|
||||
defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
|
||||
defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
|
||||
defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
|
||||
defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
|
||||
defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
|
||||
defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
|
||||
defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
|
||||
defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
|
||||
defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
|
||||
defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
|
||||
defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
|
||||
defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
|
||||
defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">;
|
||||
defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
|
||||
defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
|
||||
defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
|
||||
defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
|
||||
defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
|
||||
defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
|
||||
defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
|
||||
defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
|
||||
defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
|
||||
defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
|
||||
defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
|
||||
defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
|
||||
defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
|
||||
defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
|
||||
defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
|
||||
defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
|
||||
defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
|
||||
defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
|
||||
defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
|
||||
defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
|
||||
defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
|
||||
defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
|
||||
defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
|
||||
defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
|
||||
defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
|
||||
defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
|
||||
defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
|
||||
defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
|
||||
defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
|
||||
defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
|
||||
defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
|
||||
defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
|
||||
defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
|
||||
defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
|
||||
defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
|
||||
defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
|
||||
defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
|
||||
defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
|
||||
defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
|
||||
defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
|
||||
defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
|
||||
@ -1445,53 +1444,37 @@ defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
|
||||
fminnum>;
|
||||
defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
|
||||
fmaxnum>;
|
||||
defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32,
|
||||
AMDGPUsmin
|
||||
>;
|
||||
defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32,
|
||||
AMDGPUsmax
|
||||
>;
|
||||
defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32,
|
||||
AMDGPUumin
|
||||
>;
|
||||
defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
|
||||
AMDGPUumax
|
||||
>;
|
||||
defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
|
||||
defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
|
||||
defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
|
||||
|
||||
// No non-Rev Op on VI
|
||||
defm V_LSHRREV_B32 : VOP2Inst <
|
||||
vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
|
||||
"v_lshr_b32", "v_lshrrev_b32"
|
||||
"v_lshr_b32"
|
||||
>;
|
||||
|
||||
// No non-Rev OP on VI
|
||||
defm V_ASHRREV_I32 : VOP2Inst <
|
||||
vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
|
||||
"v_ashr_i32", "v_ashrrev_i32"
|
||||
"v_ashr_i32"
|
||||
>;
|
||||
|
||||
// No non-Rev OP on VI
|
||||
defm V_LSHLREV_B32 : VOP2Inst <
|
||||
vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
|
||||
"v_lshl_b32", "v_lshlrev_b32"
|
||||
"v_lshl_b32"
|
||||
>;
|
||||
|
||||
defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
|
||||
VOP_I32_I32_I32, and>;
|
||||
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32",
|
||||
VOP_I32_I32_I32, or
|
||||
>;
|
||||
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32",
|
||||
VOP_I32_I32_I32, xor
|
||||
>;
|
||||
defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
|
||||
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
|
||||
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
|
||||
|
||||
defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_MADMK_F32 : VOP2Inst <vop2<0x20, 0x17>, "v_madmk_f32", VOP_F32_F32_F32>;
|
||||
defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MADAK_F32 : VOP2Inst <vop2<0x21, 0x18>, "v_madak_f32", VOP_F32_F32_F32>;
|
||||
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
|
||||
} // End isCommutable = 1
|
||||
|
||||
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
||||
@ -1503,9 +1486,7 @@ let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
||||
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
|
||||
VOP_I32_I32_I32, add
|
||||
>;
|
||||
defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32",
|
||||
VOP_I32_I32_I32, sub
|
||||
>;
|
||||
defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
|
||||
|
||||
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
|
||||
VOP_I32_I32_I32, null_frag, "v_sub_i32"
|
||||
@ -1513,10 +1494,10 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
|
||||
|
||||
let Uses = [VCC] in { // Carry-in comes from VCC
|
||||
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
|
||||
VOP_I32_I32_I32_VCC, adde
|
||||
VOP_I32_I32_I32_VCC
|
||||
>;
|
||||
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
|
||||
VOP_I32_I32_I32_VCC, sube
|
||||
VOP_I32_I32_I32_VCC
|
||||
>;
|
||||
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
|
||||
VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
|
||||
@ -1529,47 +1510,41 @@ defm V_READLANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x001, 0x289>,
|
||||
"v_readlane_b32",
|
||||
(outs SReg_32:$vdst),
|
||||
(ins VGPR_32:$src0, SSrc_32:$vsrc1),
|
||||
"v_readlane_b32 $vdst, $src0, $vsrc1"
|
||||
(ins VGPR_32:$src0, SCSrc_32:$src1),
|
||||
"v_readlane_b32 $vdst, $src0, $src1"
|
||||
>;
|
||||
|
||||
defm V_WRITELANE_B32 : VOP2SI_3VI_m <
|
||||
vop3 <0x002, 0x28a>,
|
||||
"v_writelane_b32",
|
||||
(outs VGPR_32:$vdst),
|
||||
(ins SReg_32:$src0, SSrc_32:$vsrc1),
|
||||
"v_writelane_b32 $vdst, $src0, $vsrc1"
|
||||
(ins SReg_32:$src0, SCSrc_32:$src1),
|
||||
"v_writelane_b32 $vdst, $src0, $src1"
|
||||
>;
|
||||
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
|
||||
VOP_F32_F32_F32
|
||||
>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
|
||||
defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmin_legacy
|
||||
>;
|
||||
defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
|
||||
defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
|
||||
VOP_F32_F32_F32, AMDGPUfmax_legacy
|
||||
>;
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
|
||||
defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
|
||||
VOP_I32_I32_I32, sra
|
||||
>;
|
||||
|
||||
let hasPostISelHook = 1 in {
|
||||
defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
|
||||
}
|
||||
|
||||
defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
|
||||
defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
|
||||
defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
|
||||
} // End isCommutable = 1
|
||||
} // End let SubtargetPredicate = SICI
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
|
||||
VOP_F32_F32_F32
|
||||
>;
|
||||
} // End isCommutable = 1
|
||||
|
||||
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
|
||||
AMDGPUbfm
|
||||
>;
|
||||
@ -1586,14 +1561,25 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
|
||||
VOP_F32_F32_I32, AMDGPUldexp
|
||||
>;
|
||||
|
||||
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
|
||||
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
|
||||
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
|
||||
VOP_I32_F32_F32, int_SI_packf16
|
||||
|
||||
defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
|
||||
VOP_I32_F32_I32>; // TODO: set "Uses = dst"
|
||||
|
||||
defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
|
||||
VOP_I32_F32_F32
|
||||
>;
|
||||
defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
|
||||
VOP_I32_F32_F32
|
||||
>;
|
||||
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
|
||||
VOP_I32_F32_F32, int_SI_packf16
|
||||
>;
|
||||
defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
|
||||
VOP_I32_I32_I32
|
||||
>;
|
||||
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
|
||||
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP3 Instructions
|
||||
@ -1659,27 +1645,34 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
|
||||
defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
|
||||
defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
|
||||
VOP_F32_F32_F32_F32, AMDGPUfmin3>;
|
||||
|
||||
defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
|
||||
defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
|
||||
VOP_I32_I32_I32_I32, AMDGPUsmin3
|
||||
>;
|
||||
defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
|
||||
defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
|
||||
VOP_I32_I32_I32_I32, AMDGPUumin3
|
||||
>;
|
||||
defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
|
||||
defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
|
||||
VOP_F32_F32_F32_F32, AMDGPUfmax3
|
||||
>;
|
||||
defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
|
||||
defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
|
||||
VOP_I32_I32_I32_I32, AMDGPUsmax3
|
||||
>;
|
||||
defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
|
||||
defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
|
||||
VOP_I32_I32_I32_I32, AMDGPUumax3
|
||||
>;
|
||||
//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
|
||||
//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
|
||||
//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
|
||||
defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
|
||||
VOP_F32_F32_F32_F32
|
||||
>;
|
||||
defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
|
||||
VOP_I32_I32_I32_I32
|
||||
>;
|
||||
|
||||
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
|
||||
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
|
||||
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
|
||||
@ -1742,21 +1735,36 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
|
||||
|
||||
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
|
||||
|
||||
let SchedRW = [WriteFloatFMA, WriteSALU] in {
|
||||
defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
|
||||
}
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
let SchedRW = [WriteDouble, WriteSALU] in {
|
||||
// Double precision division pre-scale.
|
||||
defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
|
||||
} // let SchedRW = [WriteDouble]
|
||||
|
||||
let isCommutable = 1 in {
|
||||
defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
|
||||
let isCommutable = 1, Uses = [VCC] in {
|
||||
|
||||
// v_div_fmas_f32:
|
||||
// result = src0 * src1 + src2
|
||||
// if (vcc)
|
||||
// result *= 2^32
|
||||
//
|
||||
defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
|
||||
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
|
||||
>;
|
||||
|
||||
let SchedRW = [WriteDouble] in {
|
||||
defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
|
||||
// v_div_fmas_f64:
|
||||
// result = src0 * src1 + src2
|
||||
// if (vcc)
|
||||
// result *= 2^64
|
||||
//
|
||||
defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
|
||||
VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
|
||||
>;
|
||||
|
||||
} // End SchedRW = [WriteDouble]
|
||||
} // End isCommutable = 1
|
||||
|
||||
@ -1774,23 +1782,29 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
|
||||
// These instructions only exist on SI and CI
|
||||
let SubtargetPredicate = isSICI in {
|
||||
|
||||
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
|
||||
VOP_I64_I64_I32, shl
|
||||
>;
|
||||
|
||||
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
|
||||
VOP_I64_I64_I32, srl
|
||||
>;
|
||||
|
||||
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
|
||||
VOP_I64_I64_I32, sra
|
||||
>;
|
||||
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
|
||||
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
|
||||
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
|
||||
|
||||
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
|
||||
VOP_F32_F32_F32_F32>;
|
||||
|
||||
} // End SubtargetPredicate = isSICI
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
|
||||
VOP_I64_I32_I64
|
||||
>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pseudo Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1809,8 +1823,8 @@ def SGPR_USE : InstSI <(outs),(ins), "", []>;
|
||||
// SI pseudo instructions. These are used by the CFG structurizer pass
|
||||
// and should be lowered to ISA instructions prior to codegen.
|
||||
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
|
||||
Uses = [EXEC], Defs = [EXEC] in {
|
||||
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
|
||||
let Uses = [EXEC], Defs = [EXEC] in {
|
||||
|
||||
let isBranch = 1, isTerminator = 1 in {
|
||||
|
||||
@ -1867,15 +1881,18 @@ def SI_END_CF : InstSI <
|
||||
[(int_SI_end_cf i64:$saved)]
|
||||
>;
|
||||
|
||||
} // End Uses = [EXEC], Defs = [EXEC]
|
||||
|
||||
let Uses = [EXEC], Defs = [EXEC,VCC] in {
|
||||
def SI_KILL : InstSI <
|
||||
(outs),
|
||||
(ins VSrc_32:$src),
|
||||
"si_kill $src",
|
||||
[(int_AMDGPU_kill f32:$src)]
|
||||
>;
|
||||
} // End Uses = [EXEC], Defs = [EXEC,VCC]
|
||||
|
||||
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
|
||||
// Uses = [EXEC], Defs = [EXEC]
|
||||
|
||||
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
|
||||
|
||||
@ -2020,16 +2037,12 @@ def : Pat <
|
||||
(SI_KILL 0xbf800000)
|
||||
>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
/* int_SI_export */
|
||||
def : Pat <
|
||||
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
|
||||
@ -2156,9 +2169,13 @@ def : Pat <
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [UnsafeFPMath] in {
|
||||
def : RcpPat<V_RCP_F64_e32, f64>;
|
||||
defm : RsqPat<V_RSQ_F64_e32, f64>;
|
||||
defm : RsqPat<V_RSQ_F32_e32, f32>;
|
||||
|
||||
//def : RcpPat<V_RCP_F64_e32, f64>;
|
||||
//defm : RsqPat<V_RSQ_F64_e32, f64>;
|
||||
//defm : RsqPat<V_RSQ_F32_e32, f32>;
|
||||
|
||||
def : RsqPat<V_RSQ_F32_e32, f32>;
|
||||
def : RsqPat<V_RSQ_F64_e32, f64>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2675,13 +2692,6 @@ def : Pat <
|
||||
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
|
||||
>;
|
||||
|
||||
def : Pat<
|
||||
(fdiv f64:$src0, f64:$src1),
|
||||
(V_MUL_F64 0 /* src0_modifiers */, $src0,
|
||||
0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
|
||||
0 /* clamp */, 0 /* omod */)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(int_AMDGPU_cube v4f32:$src),
|
||||
(REG_SEQUENCE VReg_128,
|
||||
@ -2716,16 +2726,12 @@ class Ext32Pat <SDNode ext> : Pat <
|
||||
def : Ext32Pat <zext>;
|
||||
def : Ext32Pat <anyext>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
|
||||
// Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
def : Pat <
|
||||
(AMDGPUurecip i32:$src0),
|
||||
@ -2907,7 +2913,6 @@ class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
|
||||
(Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
|
||||
@ -2915,7 +2920,6 @@ def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
|
||||
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
// BUFFER_LOAD_DWORD*, addr64=0
|
||||
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
|
||||
@ -2954,14 +2958,12 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
|
||||
>;
|
||||
}
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
|
||||
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
|
||||
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
|
||||
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
|
||||
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
|
||||
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
|
||||
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
|
||||
@ -2969,13 +2971,11 @@ class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
|
||||
(Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
|
||||
>;
|
||||
|
||||
let Predicates = [isSICI] in {
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
|
||||
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
/*
|
||||
class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
|
||||
@ -3245,6 +3245,12 @@ def : Pat <
|
||||
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (trunc i64:$a)),
|
||||
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
|
||||
(EXTRACT_SUBREG $a, sub0)), 1)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (bswap i32:$a)),
|
||||
(V_BFI_B32 (S_MOV_B32 0x00ff00ff),
|
||||
@ -3257,6 +3263,28 @@ def : Pat <
|
||||
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Fract Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Predicates = [isCI] in {
|
||||
|
||||
// Convert (x - floor(x)) to fract(x)
|
||||
def : Pat <
|
||||
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
|
||||
(f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
|
||||
(V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
// Convert (x + (-floor(x))) to fract(x)
|
||||
def : Pat <
|
||||
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
|
||||
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
||||
} // End Predicates = [isCI]
|
||||
|
||||
//============================================================================//
|
||||
// Miscellaneous Optimization Patterns
|
||||
//============================================================================//
|
||||
|
@ -88,7 +88,8 @@ private:
|
||||
void Kill(MachineInstr &MI);
|
||||
void Branch(MachineInstr &MI);
|
||||
|
||||
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
|
||||
void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
|
||||
void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
|
||||
void IndirectSrc(MachineInstr &MI);
|
||||
void IndirectDst(MachineInstr &MI);
|
||||
|
||||
@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
||||
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
||||
unsigned Idx = MI.getOperand(3).getReg();
|
||||
|
||||
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addReg(Idx);
|
||||
if (Offset) {
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
|
||||
.addReg(Idx)
|
||||
.addImm(Offset);
|
||||
} else {
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
|
||||
.addReg(Idx);
|
||||
}
|
||||
MBB.insert(I, MovRel);
|
||||
} else {
|
||||
|
||||
@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
|
||||
.addReg(AMDGPU::VCC);
|
||||
|
||||
if (Offset) {
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
|
||||
.addReg(AMDGPU::M0)
|
||||
.addImm(Offset);
|
||||
}
|
||||
// Do the actual move
|
||||
MBB.insert(I, MovRel);
|
||||
|
||||
@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
/// \param @VecReg The register which holds element zero of the vector
|
||||
/// being addressed into.
|
||||
/// \param[out] @Reg The base register to use in the indirect addressing instruction.
|
||||
/// \param[in,out] @Offset As an input, this is the constant offset part of the
|
||||
// indirect Index. e.g. v0 = v[VecReg + Offset]
|
||||
// As an output, this is a constant value that needs
|
||||
// to be added to the value stored in M0.
|
||||
void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
|
||||
unsigned &Reg,
|
||||
int &Offset) {
|
||||
unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
|
||||
if (!SubReg)
|
||||
SubReg = VecReg;
|
||||
|
||||
const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
|
||||
int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
|
||||
|
||||
if (RegIdx < 0) {
|
||||
Offset = RegIdx;
|
||||
RegIdx = 0;
|
||||
} else {
|
||||
Offset = 0;
|
||||
}
|
||||
|
||||
Reg = RC->getRegister(RegIdx);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
|
||||
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Vec = MI.getOperand(2).getReg();
|
||||
unsigned Off = MI.getOperand(4).getImm();
|
||||
unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
|
||||
if (!SubReg)
|
||||
SubReg = Vec;
|
||||
int Off = MI.getOperand(4).getImm();
|
||||
unsigned Reg;
|
||||
|
||||
computeIndirectRegAndOffset(Vec, Reg, Off);
|
||||
|
||||
MachineInstr *MovRel =
|
||||
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
|
||||
.addReg(SubReg + Off)
|
||||
.addReg(Reg)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit)
|
||||
.addReg(Vec, RegState::Implicit);
|
||||
|
||||
LoadM0(MI, MovRel);
|
||||
LoadM0(MI, MovRel, Off);
|
||||
}
|
||||
|
||||
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
|
||||
@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
unsigned Dst = MI.getOperand(0).getReg();
|
||||
unsigned Off = MI.getOperand(4).getImm();
|
||||
int Off = MI.getOperand(4).getImm();
|
||||
unsigned Val = MI.getOperand(5).getReg();
|
||||
unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
|
||||
if (!SubReg)
|
||||
SubReg = Dst;
|
||||
unsigned Reg;
|
||||
|
||||
computeIndirectRegAndOffset(Dst, Reg, Off);
|
||||
|
||||
MachineInstr *MovRel =
|
||||
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
|
||||
.addReg(SubReg + Off, RegState::Define)
|
||||
.addReg(Reg, RegState::Define)
|
||||
.addReg(Val)
|
||||
.addReg(AMDGPU::M0, RegState::Implicit)
|
||||
.addReg(Dst, RegState::Implicit);
|
||||
|
||||
LoadM0(MI, MovRel);
|
||||
LoadM0(MI, MovRel, Off);
|
||||
}
|
||||
|
||||
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
@ -447,7 +486,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
Next = std::next(I);
|
||||
|
||||
MachineInstr &MI = *I;
|
||||
if (TII->isDS(MI.getOpcode()))
|
||||
if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
|
||||
NeedWQM = true;
|
||||
|
||||
// Flat uses m0 in case it needs to access LDS.
|
||||
@ -513,12 +552,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
case AMDGPU::SI_INDIRECT_DST_V16:
|
||||
IndirectDst(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::V_INTERP_P1_F32:
|
||||
case AMDGPU::V_INTERP_P2_F32:
|
||||
case AMDGPU::V_INTERP_MOV_F32:
|
||||
NeedWQM = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,6 @@
|
||||
|
||||
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
@ -47,13 +46,31 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
Reserved.set(AMDGPU::VGPR255);
|
||||
Reserved.set(AMDGPU::VGPR254);
|
||||
|
||||
// Tonga and Iceland can only allocate a fixed number of SGPRs due
|
||||
// to a hw bug.
|
||||
if (ST.hasSGPRInitBug()) {
|
||||
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
|
||||
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
|
||||
// Assume XNACK_MASK is unused.
|
||||
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
|
||||
|
||||
for (unsigned i = Limit; i < NumSGPRs; ++i) {
|
||||
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
|
||||
MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
|
||||
|
||||
for (; R.isValid(); ++R)
|
||||
Reserved.set(*R);
|
||||
}
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
|
||||
|
||||
// FIXME: We should adjust the max number of waves based on LDS size.
|
||||
unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
|
||||
unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
|
||||
ST.getMaxWavesPerCU());
|
||||
unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
|
||||
|
||||
for (regclass_iterator I = regclass_begin(), E = regclass_end();
|
||||
@ -204,7 +221,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
|
||||
BuildMI(*MBB, MI, DL,
|
||||
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
|
||||
Spill.VGPR)
|
||||
.addReg(SubReg)
|
||||
.addImm(Spill.Lane);
|
||||
|
||||
@ -236,7 +255,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
if (isM0)
|
||||
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
|
||||
BuildMI(*MBB, MI, DL,
|
||||
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
|
||||
SubReg)
|
||||
.addReg(Spill.VGPR)
|
||||
.addImm(Spill.Lane)
|
||||
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
|
||||
@ -245,7 +266,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
.addReg(SubReg);
|
||||
}
|
||||
}
|
||||
TII->insertNOPs(MI, 3);
|
||||
|
||||
// TODO: only do this when it is needed
|
||||
switch (ST.getGeneration()) {
|
||||
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
||||
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
|
||||
TII->insertNOPs(MI, 3);
|
||||
break;
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
break;
|
||||
default: // VOLCANIC_ISLANDS and later
|
||||
// "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
|
||||
// and later. This also applies to VALUs which write VCC, but we're
|
||||
// unlikely to see VMEM use VCC.
|
||||
TII->insertNOPs(MI, 4);
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
@ -490,14 +526,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
|
||||
}
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
|
||||
switch(WaveCount) {
|
||||
case 10: return 48;
|
||||
case 9: return 56;
|
||||
case 8: return 64;
|
||||
case 7: return 72;
|
||||
case 6: return 80;
|
||||
case 5: return 96;
|
||||
default: return 103;
|
||||
unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
|
||||
unsigned WaveCount) const {
|
||||
if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
switch (WaveCount) {
|
||||
case 10: return 80;
|
||||
case 9: return 80;
|
||||
case 8: return 96;
|
||||
default: return 102;
|
||||
}
|
||||
} else {
|
||||
switch(WaveCount) {
|
||||
case 10: return 48;
|
||||
case 9: return 56;
|
||||
case 8: return 64;
|
||||
case 7: return 72;
|
||||
case 6: return 80;
|
||||
case 5: return 96;
|
||||
default: return 103;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
|
||||
|
||||
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
|
||||
/// concurrent waves.
|
||||
unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
|
||||
unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
|
||||
unsigned WaveCount) const;
|
||||
|
||||
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const;
|
||||
|
@ -209,7 +209,9 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
|
||||
|
||||
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
|
||||
|
||||
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
|
||||
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
|
||||
let Size = 32;
|
||||
}
|
||||
|
||||
class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
|
||||
let OperandNamespace = "AMDGPU";
|
||||
|
@ -136,6 +136,32 @@ class VOP3e_vi <bits<10> op> : Enc64 {
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class VOP3be_vi <bits<10> op> : Enc64 {
|
||||
bits<8> vdst;
|
||||
bits<2> src0_modifiers;
|
||||
bits<9> src0;
|
||||
bits<2> src1_modifiers;
|
||||
bits<9> src1;
|
||||
bits<2> src2_modifiers;
|
||||
bits<9> src2;
|
||||
bits<7> sdst;
|
||||
bits<2> omod;
|
||||
bits<1> clamp;
|
||||
|
||||
let Inst{7-0} = vdst;
|
||||
let Inst{14-8} = sdst;
|
||||
let Inst{15} = clamp;
|
||||
let Inst{25-16} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = src0;
|
||||
let Inst{49-41} = src1;
|
||||
let Inst{58-50} = src2;
|
||||
let Inst{60-59} = omod;
|
||||
let Inst{61} = src0_modifiers{0};
|
||||
let Inst{62} = src1_modifiers{0};
|
||||
let Inst{63} = src2_modifiers{0};
|
||||
}
|
||||
|
||||
class EXPe_vi : EXPe {
|
||||
let Inst{31-26} = 0x31; //encoding
|
||||
}
|
||||
|
@ -9,18 +9,6 @@
|
||||
// Instruction definitions for VI and newer.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let SubtargetPredicate = isVI in {
|
||||
|
||||
defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
|
||||
0x14, "buffer_load_dword", VGPR_32, i32, global_load
|
||||
>;
|
||||
|
||||
defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
|
||||
0x03, "buffer_load_format_xyzw", VReg_128
|
||||
>;
|
||||
|
||||
} // End SubtargetPredicate = isVI
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SMEM Patterns
|
||||
@ -28,37 +16,10 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
|
||||
|
||||
let Predicates = [isVI] in {
|
||||
|
||||
// 1. Offset as 8bit DWORD immediate
|
||||
// 1. Offset as 20bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MUBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
// Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant v4i32:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_VI_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_VI_OFFSET,
|
||||
BUFFER_LOAD_DWORD_VI_OFFEN,
|
||||
BUFFER_LOAD_DWORD_VI_IDXEN,
|
||||
BUFFER_LOAD_DWORD_VI_BOTHEN>;
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
@ -392,12 +392,25 @@ static bool usesTheStack(const MachineFunction &MF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
|
||||
unsigned &CallOp,
|
||||
const char *&Symbol) {
|
||||
CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
|
||||
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
DebugLoc DL) {
|
||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
||||
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
|
||||
bool Is64Bit = STI.is64Bit();
|
||||
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
|
||||
const X86RegisterInfo *RegInfo =
|
||||
static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
|
||||
|
||||
if (STI.is64Bit()) {
|
||||
unsigned CallOp;
|
||||
if (Is64Bit)
|
||||
CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
|
||||
else
|
||||
CallOp = X86::CALLpcrel32;
|
||||
|
||||
const char *Symbol;
|
||||
if (Is64Bit) {
|
||||
if (STI.isTargetCygMing()) {
|
||||
Symbol = "___chkstk_ms";
|
||||
} else {
|
||||
@ -407,6 +420,37 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
|
||||
Symbol = "_alloca";
|
||||
else
|
||||
Symbol = "_chkstk";
|
||||
|
||||
MachineInstrBuilder CI;
|
||||
|
||||
// All current stack probes take AX and SP as input, clobber flags, and
|
||||
// preserve all registers. x86_64 probes leave RSP unmodified.
|
||||
if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
|
||||
// For the large code model, we have to call through a register. Use R11,
|
||||
// as it is scratch in all supported calling conventions.
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
|
||||
.addExternalSymbol(Symbol);
|
||||
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
|
||||
} else {
|
||||
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
|
||||
}
|
||||
|
||||
unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
|
||||
unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
|
||||
CI.addReg(AX, RegState::Implicit)
|
||||
.addReg(SP, RegState::Implicit)
|
||||
.addReg(AX, RegState::Define | RegState::Implicit)
|
||||
.addReg(SP, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
|
||||
|
||||
if (Is64Bit) {
|
||||
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
|
||||
// themselves. It also does not clobber %rax so we can reuse it when
|
||||
// adjusting %rsp.
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
|
||||
.addReg(X86::RSP)
|
||||
.addReg(X86::RAX);
|
||||
}
|
||||
}
|
||||
|
||||
/// emitPrologue - Push callee-saved registers onto the stack, which
|
||||
@ -739,11 +783,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
// increments is necessary to ensure that the guard pages used by the OS
|
||||
// virtual memory manager are allocated in correct sequence.
|
||||
if (NumBytes >= StackProbeSize && UseStackProbe) {
|
||||
const char *StackProbeSymbol;
|
||||
unsigned CallOp;
|
||||
|
||||
getStackProbeFunction(STI, CallOp, StackProbeSymbol);
|
||||
|
||||
// Check whether EAX is livein for this function.
|
||||
bool isEAXAlive = isEAXLiveIn(MF);
|
||||
|
||||
@ -772,22 +811,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
|
||||
BuildMI(MBB, MBBI, DL,
|
||||
TII.get(CallOp))
|
||||
.addExternalSymbol(StackProbeSymbol)
|
||||
.addReg(StackPtr, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
// Save a pointer to the MI where we set AX.
|
||||
MachineBasicBlock::iterator SetRAX = MBBI;
|
||||
--SetRAX;
|
||||
|
||||
// Call __chkstk, __chkstk_ms, or __alloca.
|
||||
emitStackProbeCall(MF, MBB, MBBI, DL);
|
||||
|
||||
// Apply the frame setup flag to all inserted instrs.
|
||||
for (; SetRAX != MBBI; ++SetRAX)
|
||||
SetRAX->setFlag(MachineInstr::FrameSetup);
|
||||
|
||||
if (Is64Bit) {
|
||||
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
|
||||
// themself. It also does not clobber %rax so we can reuse it when
|
||||
// adjusting %rsp.
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addReg(X86::RAX)
|
||||
.setMIFlag(MachineInstr::FrameSetup);
|
||||
}
|
||||
if (isEAXAlive) {
|
||||
// Restore EAX
|
||||
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
|
||||
|
@ -27,9 +27,11 @@ public:
|
||||
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
|
||||
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
|
||||
|
||||
static void getStackProbeFunction(const X86Subtarget &STI,
|
||||
unsigned &CallOp,
|
||||
const char *&Symbol);
|
||||
/// Emit a call to the target's stack probe function. This is required for all
|
||||
/// large stack allocations on Windows. The caller is required to materialize
|
||||
/// the number of bytes to probe in RAX/EAX.
|
||||
static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, DebugLoc DL);
|
||||
|
||||
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "X86ISelLowering.h"
|
||||
#include "Utils/X86ShuffleDecode.h"
|
||||
#include "X86CallingConv.h"
|
||||
#include "X86FrameLowering.h"
|
||||
#include "X86InstrBuilder.h"
|
||||
#include "X86MachineFunctionInfo.h"
|
||||
#include "X86TargetMachine.h"
|
||||
@ -10094,12 +10095,12 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
VT.getVectorNumElements() / 2);
|
||||
// Check for patterns which can be matched with a single insert of a 128-bit
|
||||
// subvector.
|
||||
if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
|
||||
isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
|
||||
bool OnlyUsesV1 = isShuffleEquivalent(Mask, 0, 1, 0, 1);
|
||||
if (OnlyUsesV1 || isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
|
||||
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
|
||||
DAG.getIntPtrConstant(0));
|
||||
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
|
||||
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
|
||||
OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
|
||||
}
|
||||
if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
|
||||
@ -10112,7 +10113,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
|
||||
|
||||
// Otherwise form a 128-bit permutation.
|
||||
// FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
|
||||
unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
|
||||
int MaskLO = Mask[0];
|
||||
if (MaskLO == SM_SentinelUndef)
|
||||
MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
|
||||
|
||||
int MaskHI = Mask[2];
|
||||
if (MaskHI == SM_SentinelUndef)
|
||||
MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
|
||||
|
||||
unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
|
||||
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
|
||||
DAG.getConstant(PermMask, MVT::i8));
|
||||
}
|
||||
@ -17172,6 +17181,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
|
||||
switch (IntNo) {
|
||||
default: return SDValue(); // Don't custom lower most intrinsics.
|
||||
|
||||
case Intrinsic::x86_avx2_permd:
|
||||
case Intrinsic::x86_avx2_permps:
|
||||
// Operands intentionally swapped. Mask is last operand to intrinsic,
|
||||
// but second operand for node/instruction.
|
||||
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
|
||||
Op.getOperand(2), Op.getOperand(1));
|
||||
|
||||
case Intrinsic::x86_avx512_mask_valign_q_512:
|
||||
case Intrinsic::x86_avx512_mask_valign_d_512:
|
||||
// Vector source operands are swapped.
|
||||
@ -21076,47 +21092,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
|
||||
|
||||
assert(!Subtarget->isTargetMachO());
|
||||
|
||||
// The lowering is pretty easy: we're just emitting the call to _alloca. The
|
||||
// non-trivial part is impdef of ESP.
|
||||
|
||||
if (Subtarget->isTargetWin64()) {
|
||||
if (Subtarget->isTargetCygMing()) {
|
||||
// ___chkstk(Mingw64):
|
||||
// Clobbers R10, R11, RAX and EFLAGS.
|
||||
// Updates RSP.
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
|
||||
.addExternalSymbol("___chkstk")
|
||||
.addReg(X86::RAX, RegState::Implicit)
|
||||
.addReg(X86::RSP, RegState::Implicit)
|
||||
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
|
||||
} else {
|
||||
// __chkstk(MSVCRT): does not update stack pointer.
|
||||
// Clobbers R10, R11 and EFLAGS.
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
|
||||
.addExternalSymbol("__chkstk")
|
||||
.addReg(X86::RAX, RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
|
||||
// RAX has the offset to be subtracted from RSP.
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
|
||||
.addReg(X86::RSP)
|
||||
.addReg(X86::RAX);
|
||||
}
|
||||
} else {
|
||||
const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
|
||||
Subtarget->isTargetWindowsItanium())
|
||||
? "_chkstk"
|
||||
: "_alloca";
|
||||
|
||||
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
|
||||
.addExternalSymbol(StackProbeSymbol)
|
||||
.addReg(X86::EAX, RegState::Implicit)
|
||||
.addReg(X86::ESP, RegState::Implicit)
|
||||
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
|
||||
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
|
||||
}
|
||||
X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
|
||||
|
||||
MI->eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return BB;
|
||||
@ -25558,45 +25534,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
|
||||
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
|
||||
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
|
||||
LHS.getValueType(), RHS, LHS.getOperand(1));
|
||||
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
|
||||
addV, DAG.getConstant(0, addV.getValueType()), CC);
|
||||
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
|
||||
LHS.getOperand(1));
|
||||
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
|
||||
DAG.getConstant(0, addV.getValueType()), CC);
|
||||
}
|
||||
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
|
||||
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
|
||||
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
|
||||
RHS.getValueType(), LHS, RHS.getOperand(1));
|
||||
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
|
||||
addV, DAG.getConstant(0, addV.getValueType()), CC);
|
||||
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
|
||||
RHS.getOperand(1));
|
||||
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
|
||||
DAG.getConstant(0, addV.getValueType()), CC);
|
||||
}
|
||||
|
||||
if (VT.getScalarType() == MVT::i1) {
|
||||
bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
|
||||
if (!IsSEXT0 && !IsVZero0)
|
||||
return SDValue();
|
||||
bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
if (VT.getScalarType() == MVT::i1 &&
|
||||
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
|
||||
bool IsSEXT0 =
|
||||
(LHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
|
||||
|
||||
if (!IsSEXT1 && !IsVZero1)
|
||||
return SDValue();
|
||||
if (!IsSEXT0 || !IsVZero1) {
|
||||
// Swap the operands and update the condition code.
|
||||
std::swap(LHS, RHS);
|
||||
CC = ISD::getSetCCSwappedOperands(CC);
|
||||
|
||||
IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
|
||||
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
|
||||
IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
|
||||
}
|
||||
|
||||
if (IsSEXT0 && IsVZero1) {
|
||||
assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
|
||||
if (CC == ISD::SETEQ)
|
||||
assert(VT == LHS.getOperand(0).getValueType() &&
|
||||
"Uexpected operand type");
|
||||
if (CC == ISD::SETGT)
|
||||
return DAG.getConstant(0, VT);
|
||||
if (CC == ISD::SETLE)
|
||||
return DAG.getConstant(1, VT);
|
||||
if (CC == ISD::SETEQ || CC == ISD::SETGE)
|
||||
return DAG.getNOT(DL, LHS.getOperand(0), VT);
|
||||
|
||||
assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
|
||||
"Unexpected condition code!");
|
||||
return LHS.getOperand(0);
|
||||
}
|
||||
if (IsSEXT1 && IsVZero0) {
|
||||
assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
|
||||
if (CC == ISD::SETEQ)
|
||||
return DAG.getNOT(DL, RHS.getOperand(0), VT);
|
||||
return RHS.getOperand(0);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
|
@ -279,7 +279,8 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
|
||||
}
|
||||
|
||||
let isCall = 1, isCodeGenOnly = 1 in
|
||||
// __chkstk(MSVC): clobber R10, R11 and EFLAGS.
|
||||
// __chkstk(MSVC): clobber R10, R11 and EFLAGS
|
||||
// ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS
|
||||
// ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
|
||||
let Defs = [RAX, R10, R11, RSP, EFLAGS],
|
||||
Uses = [RSP] in {
|
||||
|
@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
||||
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
|
||||
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
|
||||
|
@ -47,6 +47,8 @@ using namespace llvm;
|
||||
static cl::opt<std::string>
|
||||
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
|
||||
cl::ValueRequired);
|
||||
static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body",
|
||||
cl::init(false), cl::Hidden);
|
||||
|
||||
GCOVOptions GCOVOptions::getDefault() {
|
||||
GCOVOptions Options;
|
||||
@ -312,7 +314,7 @@ namespace {
|
||||
class GCOVFunction : public GCOVRecord {
|
||||
public:
|
||||
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
|
||||
bool UseCfgChecksum)
|
||||
bool UseCfgChecksum, bool ExitBlockBeforeBody)
|
||||
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
|
||||
ReturnBlock(1, os) {
|
||||
this->os = os;
|
||||
@ -322,11 +324,13 @@ namespace {
|
||||
|
||||
uint32_t i = 0;
|
||||
for (auto &BB : *F) {
|
||||
// Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the
|
||||
// ReturnBlock.
|
||||
bool first = i == 0;
|
||||
Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os)));
|
||||
// Skip index 1 if it's assigned to the ReturnBlock.
|
||||
if (i == 1 && ExitBlockBeforeBody)
|
||||
++i;
|
||||
Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os)));
|
||||
}
|
||||
if (!ExitBlockBeforeBody)
|
||||
ReturnBlock.Number = i;
|
||||
|
||||
std::string FunctionNameAndLine;
|
||||
raw_string_ostream FNLOS(FunctionNameAndLine);
|
||||
@ -469,7 +473,7 @@ static bool functionHasLines(Function *F) {
|
||||
if (Loc.isUnknown()) continue;
|
||||
|
||||
// Artificial lines such as calls to the global constructors.
|
||||
if (Loc.getLine() == 0) continue;
|
||||
if (Loc.getLine() == 0) continue;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -513,7 +517,8 @@ void GCOVProfiler::emitProfileNotes() {
|
||||
EntryBlock.splitBasicBlock(It);
|
||||
|
||||
Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
|
||||
Options.UseCfgChecksum));
|
||||
Options.UseCfgChecksum,
|
||||
DefaultExitBlockBeforeBody));
|
||||
GCOVFunction &Func = *Funcs.back();
|
||||
|
||||
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
|
||||
|
@ -2183,12 +2183,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
|
||||
// Handle the floating point versions of equality comparisons too.
|
||||
if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
|
||||
(isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
|
||||
// Floating point -0.0 and 0.0 compare equal, so we can't
|
||||
// propagate a constant based on that comparison.
|
||||
|
||||
// Floating point -0.0 and 0.0 compare equal, so we can only
|
||||
// propagate values if we know that we have a constant and that
|
||||
// its value is non-zero.
|
||||
|
||||
// FIXME: We should do this optimization if 'no signed zeros' is
|
||||
// applicable via an instruction-level fast-math-flag or some other
|
||||
// indicator that relaxed FP semantics are being used.
|
||||
if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero())
|
||||
|
||||
if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
|
||||
Worklist.push_back(std::make_pair(Op0, Op1));
|
||||
}
|
||||
|
||||
|
@ -288,14 +288,11 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
|
||||
|
||||
IntegerType *IT = cast<IntegerType>(IVOperand->getType());
|
||||
Value *OtherOperand = nullptr;
|
||||
int OtherOperandIdx = -1;
|
||||
if (BO->getOperand(0) == IVOperand) {
|
||||
OtherOperand = BO->getOperand(1);
|
||||
OtherOperandIdx = 1;
|
||||
} else {
|
||||
assert(BO->getOperand(1) == IVOperand && "only other use!");
|
||||
OtherOperand = BO->getOperand(0);
|
||||
OtherOperandIdx = 0;
|
||||
}
|
||||
|
||||
bool Changed = false;
|
||||
|
@ -1,11 +1,11 @@
|
||||
This is a set of individual patches, which contain all the customizations to
|
||||
llvm/clang currently in the FreeBSD base system. These can be applied in
|
||||
alphabetical order to a pristine llvm/clang 3.6.0 source tree, for example by
|
||||
alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by
|
||||
doing:
|
||||
|
||||
svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/final llvm-3.6.0
|
||||
svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_360/final llvm-3.6.0/tools/clang
|
||||
cd llvm-3.6.0
|
||||
svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1
|
||||
svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang
|
||||
cd llvm-3.6.1
|
||||
for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
|
||||
patch -p0 -f -F0 -E -i $p -s || break
|
||||
done
|
||||
|
@ -44,7 +44,7 @@ Index: tools/clang/include/clang/Sema/Sema.h
|
||||
===================================================================
|
||||
--- tools/clang/include/clang/Sema/Sema.h
|
||||
+++ tools/clang/include/clang/Sema/Sema.h
|
||||
@@ -8566,6 +8566,7 @@ class Sema {
|
||||
@@ -8567,6 +8567,7 @@ class Sema {
|
||||
FST_Strftime,
|
||||
FST_Strfmon,
|
||||
FST_Kprintf,
|
||||
@ -230,7 +230,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
|
||||
===================================================================
|
||||
--- tools/clang/lib/Sema/SemaChecking.cpp
|
||||
+++ tools/clang/lib/Sema/SemaChecking.cpp
|
||||
@@ -2584,6 +2584,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
|
||||
@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
|
||||
.Case("strftime", FST_Strftime)
|
||||
.Case("strfmon", FST_Strfmon)
|
||||
.Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf)
|
||||
@ -238,7 +238,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
|
||||
.Default(FST_Unknown);
|
||||
}
|
||||
|
||||
@@ -3365,6 +3366,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
|
||||
@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
|
||||
CoveredArgs.set(argIndex);
|
||||
}
|
||||
|
||||
@ -282,7 +282,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
|
||||
// Check for using an Objective-C specific conversion specifier
|
||||
// in a non-ObjC literal.
|
||||
if (!ObjCContext && CS.isObjCArg()) {
|
||||
@@ -3988,7 +4026,8 @@ void Sema::CheckFormatString(const StringLiteral *
|
||||
@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral *
|
||||
return;
|
||||
}
|
||||
|
||||
@ -292,7 +292,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
|
||||
CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
|
||||
numDataArgs, (Type == FST_NSString),
|
||||
Str, HasVAListArg, Args, format_idx,
|
||||
@@ -3996,7 +4035,8 @@ void Sema::CheckFormatString(const StringLiteral *
|
||||
@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral *
|
||||
|
||||
if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
|
||||
getLangOpts(),
|
||||
|
@ -596,7 +596,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
|
||||
static unsigned getLEArOpcode(unsigned IsLP64) {
|
||||
return IsLP64 ? X86::LEA64r : X86::LEA32r;
|
||||
}
|
||||
@@ -1848,100 +1865,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
|
||||
@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -697,7 +697,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
|
||||
void X86FrameLowering::
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
@@ -1956,7 +1879,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
bool IsLP64 = STI.isTarget64BitLP64();
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
|
||||
@ -706,7 +706,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
|
||||
I = MBB.erase(I);
|
||||
|
||||
if (!reserveCallFrame) {
|
||||
@@ -1976,24 +1899,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
|
||||
|
||||
MachineInstr *New = nullptr;
|
||||
@ -740,7 +740,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
|
||||
unsigned Opc = getADDriOpcode(IsLP64, Amount);
|
||||
New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr).addImm(Amount);
|
||||
@@ -2011,13 +1928,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
return;
|
||||
}
|
||||
|
||||
@ -761,7 +761,7 @@ Index: lib/Target/X86/X86FrameLowering.h
|
||||
===================================================================
|
||||
--- lib/Target/X86/X86FrameLowering.h
|
||||
+++ lib/Target/X86/X86FrameLowering.h
|
||||
@@ -64,6 +64,8 @@ class X86FrameLowering : public TargetFrameLowerin
|
||||
@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin
|
||||
|
||||
bool hasFP(const MachineFunction &MF) const override;
|
||||
bool hasReservedCallFrame(const MachineFunction &MF) const override;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -67,7 +67,7 @@ Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7696,11 +7696,8 @@ void SelectionDAGISel::LowerArguments(const Functi
|
||||
@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi
|
||||
}
|
||||
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
|
||||
Flags.setNest();
|
||||
@ -80,7 +80,7 @@ Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
|
||||
Flags.setOrigAlign(OriginalAlignment);
|
||||
|
||||
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
|
||||
@@ -7715,6 +7712,8 @@ void SelectionDAGISel::LowerArguments(const Functi
|
||||
@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi
|
||||
MyFlags.Flags.setOrigAlign(1);
|
||||
Ins.push_back(MyFlags);
|
||||
}
|
||||
@ -287,7 +287,7 @@ Index: lib/Target/ARM/ARMISelLowering.cpp
|
||||
===================================================================
|
||||
--- lib/Target/ARM/ARMISelLowering.cpp
|
||||
+++ lib/Target/ARM/ARMISelLowering.cpp
|
||||
@@ -11280,7 +11280,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
|
||||
@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
|
||||
return (Members > 0 && Members <= 4);
|
||||
}
|
||||
|
||||
@ -298,7 +298,7 @@ Index: lib/Target/ARM/ARMISelLowering.cpp
|
||||
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
|
||||
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
|
||||
if (getEffectiveCallingConv(CallConv, isVarArg) !=
|
||||
@@ -11289,7 +11291,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
|
||||
@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
|
||||
|
||||
HABaseType Base = HA_UNKNOWN;
|
||||
uint64_t Members = 0;
|
@ -1,55 +0,0 @@
|
||||
Pull in r230058 from upstream llvm trunk (by Benjamin Kramer):
|
||||
|
||||
LoopRotate: When reconstructing loop simplify form don't split edges
|
||||
from indirectbrs.
|
||||
|
||||
Yet another chapter in the endless story. While this looks like we
|
||||
leave the loop in a non-canonical state this replicates the logic in
|
||||
LoopSimplify so it doesn't diverge from the canonical form in any way.
|
||||
|
||||
PR21968
|
||||
|
||||
This fixes a "Cannot split critical edge from IndirectBrInst" assertion
|
||||
failure when building the devel/radare2 port.
|
||||
|
||||
Introduced here: https://svnweb.freebsd.org/changeset/base/279161
|
||||
|
||||
Index: lib/Transforms/Scalar/LoopRotation.cpp
|
||||
===================================================================
|
||||
--- lib/Transforms/Scalar/LoopRotation.cpp
|
||||
+++ lib/Transforms/Scalar/LoopRotation.cpp
|
||||
@@ -535,6 +535,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool Simplifi
|
||||
Loop *PredLoop = LI->getLoopFor(*PI);
|
||||
if (!PredLoop || PredLoop->contains(Exit))
|
||||
continue;
|
||||
+ if (isa<IndirectBrInst>((*PI)->getTerminator()))
|
||||
+ continue;
|
||||
SplitLatchEdge |= L->getLoopLatch() == *PI;
|
||||
BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
|
||||
ExitSplit->moveBefore(Exit);
|
||||
Index: test/Transforms/LoopRotate/crash.ll
|
||||
===================================================================
|
||||
--- test/Transforms/LoopRotate/crash.ll
|
||||
+++ test/Transforms/LoopRotate/crash.ll
|
||||
@@ -153,3 +153,21 @@ entry:
|
||||
"5": ; preds = %"3", %entry
|
||||
ret void
|
||||
}
|
||||
+
|
||||
+; PR21968
|
||||
+define void @test8(i1 %C, i8* %P) #0 {
|
||||
+entry:
|
||||
+ br label %for.cond
|
||||
+
|
||||
+for.cond: ; preds = %for.inc, %entry
|
||||
+ br i1 %C, label %l_bad, label %for.body
|
||||
+
|
||||
+for.body: ; preds = %for.cond
|
||||
+ indirectbr i8* %P, [label %for.inc, label %l_bad]
|
||||
+
|
||||
+for.inc: ; preds = %for.body
|
||||
+ br label %for.cond
|
||||
+
|
||||
+l_bad: ; preds = %for.body, %for.cond
|
||||
+ ret void
|
||||
+}
|
@ -1,811 +0,0 @@
|
||||
Pull in r231227 from upstream llvm trunk (by Kristof Beyls):
|
||||
|
||||
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers
|
||||
cannot handle yet.
|
||||
|
||||
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the
|
||||
GNU linkers ld.bfd and ld.gold currently only support a subset of the
|
||||
whole range of AArch64 ELF TLS relocations. Furthermore, they assume
|
||||
that some of the code sequences to access thread-local variables are
|
||||
produced in a very specific sequence. When the sequence is not as the
|
||||
linker expects, it can silently mis-relaxe/mis-optimize the
|
||||
instructions.
|
||||
Even if that wouldn't be the case, it's good to produce the exact
|
||||
sequence, as that ensures that linkers can perform optimizing
|
||||
relaxations.
|
||||
|
||||
This patch:
|
||||
|
||||
* implements support for 16MiB TLS area size instead of 4GiB TLS area
|
||||
size. Ideally clang would grow an -mtls-size option to allow
|
||||
support for both, but that's not part of this patch.
|
||||
* by default doesn't produce local dynamic access patterns, as even
|
||||
modern ld.bfd and ld.gold linkers do not support the associated
|
||||
relocations. An option (-aarch64-elf-ldtls-generation) is added to
|
||||
enable generation of local dynamic code sequence, but is off by
|
||||
default.
|
||||
* makes sure that the exact expected code sequence for local dynamic
|
||||
and general dynamic accesses is produced, by making use of a new
|
||||
pseudo instruction. The patch also removes two
|
||||
(AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing
|
||||
AArch64-specific pseudo SDNode instructions that are superseded by
|
||||
the new one (TLSDESC_CALLSEQ).
|
||||
|
||||
Introduced here: https://svnweb.freebsd.org/changeset/base/280865
|
||||
|
||||
Index: lib/Target/AArch64/AArch64AsmPrinter.cpp
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64AsmPrinter.cpp
|
||||
+++ lib/Target/AArch64/AArch64AsmPrinter.cpp
|
||||
@@ -12,6 +12,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
+#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
+#include "MCTargetDesc/AArch64MCExpr.h"
|
||||
#include "AArch64.h"
|
||||
#include "AArch64MCInstLower.h"
|
||||
#include "AArch64MachineFunctionInfo.h"
|
||||
@@ -494,12 +496,47 @@ void AArch64AsmPrinter::EmitInstruction(const Mach
|
||||
EmitToStreamer(OutStreamer, TmpInst);
|
||||
return;
|
||||
}
|
||||
- case AArch64::TLSDESC_BLR: {
|
||||
- MCOperand Callee, Sym;
|
||||
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
|
||||
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
|
||||
+ case AArch64::TLSDESC_CALLSEQ: {
|
||||
+ /// lower this to:
|
||||
+ /// adrp x0, :tlsdesc:var
|
||||
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
|
||||
+ /// add x0, x0, #:tlsdesc_lo12:var
|
||||
+ /// .tlsdesccall var
|
||||
+ /// blr x1
|
||||
+ /// (TPIDR_EL0 offset now in x0)
|
||||
+ const MachineOperand &MO_Sym = MI->getOperand(0);
|
||||
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
|
||||
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
|
||||
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
|
||||
+ AArch64II::MO_NC);
|
||||
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
|
||||
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
|
||||
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
|
||||
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
|
||||
|
||||
- // First emit a relocation-annotation. This expands to no code, but requests
|
||||
+ MCInst Adrp;
|
||||
+ Adrp.setOpcode(AArch64::ADRP);
|
||||
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
|
||||
+ Adrp.addOperand(SymTLSDesc);
|
||||
+ EmitToStreamer(OutStreamer, Adrp);
|
||||
+
|
||||
+ MCInst Ldr;
|
||||
+ Ldr.setOpcode(AArch64::LDRXui);
|
||||
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
|
||||
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
|
||||
+ Ldr.addOperand(SymTLSDescLo12);
|
||||
+ Ldr.addOperand(MCOperand::CreateImm(0));
|
||||
+ EmitToStreamer(OutStreamer, Ldr);
|
||||
+
|
||||
+ MCInst Add;
|
||||
+ Add.setOpcode(AArch64::ADDXri);
|
||||
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
|
||||
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
|
||||
+ Add.addOperand(SymTLSDescLo12);
|
||||
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
|
||||
+ EmitToStreamer(OutStreamer, Add);
|
||||
+
|
||||
+ // Emit a relocation-annotation. This expands to no code, but requests
|
||||
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
|
||||
MCInst TLSDescCall;
|
||||
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
|
||||
@@ -506,12 +543,10 @@ void AArch64AsmPrinter::EmitInstruction(const Mach
|
||||
TLSDescCall.addOperand(Sym);
|
||||
EmitToStreamer(OutStreamer, TLSDescCall);
|
||||
|
||||
- // Other than that it's just a normal indirect call to the function loaded
|
||||
- // from the descriptor.
|
||||
- MCInst BLR;
|
||||
- BLR.setOpcode(AArch64::BLR);
|
||||
- BLR.addOperand(Callee);
|
||||
- EmitToStreamer(OutStreamer, BLR);
|
||||
+ MCInst Blr;
|
||||
+ Blr.setOpcode(AArch64::BLR);
|
||||
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
|
||||
+ EmitToStreamer(OutStreamer, Blr);
|
||||
|
||||
return;
|
||||
}
|
||||
Index: lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
|
||||
+++ lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
|
||||
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
|
||||
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
|
||||
++I) {
|
||||
switch (I->getOpcode()) {
|
||||
- case AArch64::TLSDESC_BLR:
|
||||
+ case AArch64::TLSDESC_CALLSEQ:
|
||||
// Make sure it's a local dynamic access.
|
||||
- if (!I->getOperand(1).isSymbol() ||
|
||||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
|
||||
+ if (!I->getOperand(0).isSymbol() ||
|
||||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
|
||||
break;
|
||||
|
||||
if (TLSBaseAddrReg)
|
||||
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
|
||||
@@ -64,10 +64,18 @@ EnableAArch64ExtrGeneration("aarch64-extr-generati
|
||||
|
||||
static cl::opt<bool>
|
||||
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
|
||||
- cl::desc("Allow AArch64 SLI/SRI formation"),
|
||||
- cl::init(false));
|
||||
+ cl::desc("Allow AArch64 SLI/SRI formation"),
|
||||
+ cl::init(false));
|
||||
|
||||
+// FIXME: The necessary dtprel relocations don't seem to be supported
|
||||
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
|
||||
+// default, for now, fall back to GeneralDynamic code generation.
|
||||
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
|
||||
+ "aarch64-elf-ldtls-generation", cl::Hidden,
|
||||
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
|
||||
+ cl::init(false));
|
||||
|
||||
+
|
||||
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
|
||||
: TargetLowering(TM) {
|
||||
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
|
||||
@@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTargetNodeNa
|
||||
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
|
||||
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
|
||||
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
|
||||
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
|
||||
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
|
||||
case AArch64ISD::ADC: return "AArch64ISD::ADC";
|
||||
case AArch64ISD::SBC: return "AArch64ISD::SBC";
|
||||
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
|
||||
@@ -3049,61 +3057,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress
|
||||
/// When accessing thread-local variables under either the general-dynamic or
|
||||
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
|
||||
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
|
||||
-/// is a function pointer to carry out the resolution. This function takes the
|
||||
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
|
||||
-/// other registers (except LR, NZCV) are preserved.
|
||||
+/// is a function pointer to carry out the resolution.
|
||||
///
|
||||
-/// Thus, the ideal call sequence on AArch64 is:
|
||||
+/// The sequence is:
|
||||
+/// adrp x0, :tlsdesc:var
|
||||
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
|
||||
+/// add x0, x0, #:tlsdesc_lo12:var
|
||||
+/// .tlsdesccall var
|
||||
+/// blr x1
|
||||
+/// (TPIDR_EL0 offset now in x0)
|
||||
///
|
||||
-/// adrp x0, :tlsdesc:thread_var
|
||||
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
|
||||
-/// add x0, x0, :tlsdesc_lo12:thread_var
|
||||
-/// .tlsdesccall thread_var
|
||||
-/// blr x8
|
||||
-/// (TPIDR_EL0 offset now in x0).
|
||||
-///
|
||||
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
|
||||
-/// relocation to help the linker relax this sequence if it turns out to be too
|
||||
-/// conservative.
|
||||
-///
|
||||
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
|
||||
-/// is harmless.
|
||||
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
|
||||
- SDValue DescAddr, SDLoc DL,
|
||||
- SelectionDAG &DAG) const {
|
||||
+/// The above sequence must be produced unscheduled, to enable the linker to
|
||||
+/// optimize/relax this sequence.
|
||||
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
|
||||
+/// above sequence, and expanded really late in the compilation flow, to ensure
|
||||
+/// the sequence is produced as per above.
|
||||
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
|
||||
+ SelectionDAG &DAG) const {
|
||||
EVT PtrVT = getPointerTy();
|
||||
|
||||
- // The function we need to call is simply the first entry in the GOT for this
|
||||
- // descriptor, load it in preparation.
|
||||
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
|
||||
+ SDValue Chain = DAG.getEntryNode();
|
||||
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
|
||||
- // TLS calls preserve all registers except those that absolutely must be
|
||||
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
|
||||
- // silly).
|
||||
- const TargetRegisterInfo *TRI =
|
||||
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
|
||||
- const AArch64RegisterInfo *ARI =
|
||||
- static_cast<const AArch64RegisterInfo *>(TRI);
|
||||
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
|
||||
-
|
||||
- // The function takes only one argument: the address of the descriptor itself
|
||||
- // in X0.
|
||||
- SDValue Glue, Chain;
|
||||
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
|
||||
- Glue = Chain.getValue(1);
|
||||
-
|
||||
- // We're now ready to populate the argument list, as with a normal call:
|
||||
- SmallVector<SDValue, 6> Ops;
|
||||
+ SmallVector<SDValue, 2> Ops;
|
||||
Ops.push_back(Chain);
|
||||
- Ops.push_back(Func);
|
||||
Ops.push_back(SymAddr);
|
||||
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
|
||||
- Ops.push_back(DAG.getRegisterMask(Mask));
|
||||
- Ops.push_back(Glue);
|
||||
|
||||
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
|
||||
- Glue = Chain.getValue(1);
|
||||
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
|
||||
+ SDValue Glue = Chain.getValue(1);
|
||||
|
||||
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
|
||||
}
|
||||
@@ -3114,9 +3095,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
|
||||
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
|
||||
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
|
||||
"ELF TLS only supported in small memory model");
|
||||
+ // Different choices can be made for the maximum size of the TLS area for a
|
||||
+ // module. For the small address model, the default TLS size is 16MiB and the
|
||||
+ // maximum TLS size is 4GiB.
|
||||
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
|
||||
+ // vs. 4GiB code sequence generation.
|
||||
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
|
||||
|
||||
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
|
||||
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
|
||||
+ if (Model == TLSModel::LocalDynamic)
|
||||
+ Model = TLSModel::GeneralDynamic;
|
||||
+ }
|
||||
|
||||
SDValue TPOff;
|
||||
EVT PtrVT = getPointerTy();
|
||||
@@ -3127,17 +3117,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
|
||||
|
||||
if (Model == TLSModel::LocalExec) {
|
||||
SDValue HiVar = DAG.getTargetGlobalAddress(
|
||||
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
|
||||
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
|
||||
SDValue LoVar = DAG.getTargetGlobalAddress(
|
||||
GV, DL, PtrVT, 0,
|
||||
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
|
||||
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
|
||||
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
|
||||
- DAG.getTargetConstant(16, MVT::i32)),
|
||||
- 0);
|
||||
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
|
||||
- DAG.getTargetConstant(0, MVT::i32)),
|
||||
- 0);
|
||||
+ SDValue TPWithOff_lo =
|
||||
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
|
||||
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
|
||||
+ 0);
|
||||
+ SDValue TPWithOff =
|
||||
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
|
||||
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
|
||||
+ 0);
|
||||
+ return TPWithOff;
|
||||
} else if (Model == TLSModel::InitialExec) {
|
||||
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
|
||||
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
|
||||
@@ -3152,19 +3145,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
|
||||
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
|
||||
MFI->incNumLocalDynamicTLSAccesses();
|
||||
|
||||
- // Accesses used in this sequence go via the TLS descriptor which lives in
|
||||
- // the GOT. Prepare an address we can use to handle this.
|
||||
- SDValue HiDesc = DAG.getTargetExternalSymbol(
|
||||
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
|
||||
- SDValue LoDesc = DAG.getTargetExternalSymbol(
|
||||
- "_TLS_MODULE_BASE_", PtrVT,
|
||||
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
-
|
||||
- // First argument to the descriptor call is the address of the descriptor
|
||||
- // itself.
|
||||
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
|
||||
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
|
||||
-
|
||||
// The call needs a relocation too for linker relaxation. It doesn't make
|
||||
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
|
||||
// the address.
|
||||
@@ -3173,40 +3153,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
|
||||
|
||||
// Now we can calculate the offset from TPIDR_EL0 to this module's
|
||||
// thread-local area.
|
||||
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
|
||||
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
|
||||
|
||||
// Now use :dtprel_whatever: operations to calculate this variable's offset
|
||||
// in its thread-storage area.
|
||||
SDValue HiVar = DAG.getTargetGlobalAddress(
|
||||
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
|
||||
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
|
||||
SDValue LoVar = DAG.getTargetGlobalAddress(
|
||||
GV, DL, MVT::i64, 0,
|
||||
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
|
||||
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
|
||||
- SDValue DTPOff =
|
||||
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
|
||||
- DAG.getTargetConstant(16, MVT::i32)),
|
||||
- 0);
|
||||
- DTPOff =
|
||||
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
|
||||
- DAG.getTargetConstant(0, MVT::i32)),
|
||||
- 0);
|
||||
-
|
||||
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
|
||||
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
|
||||
+ DAG.getTargetConstant(0, MVT::i32)),
|
||||
+ 0);
|
||||
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
|
||||
+ DAG.getTargetConstant(0, MVT::i32)),
|
||||
+ 0);
|
||||
} else if (Model == TLSModel::GeneralDynamic) {
|
||||
- // Accesses used in this sequence go via the TLS descriptor which lives in
|
||||
- // the GOT. Prepare an address we can use to handle this.
|
||||
- SDValue HiDesc = DAG.getTargetGlobalAddress(
|
||||
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
|
||||
- SDValue LoDesc = DAG.getTargetGlobalAddress(
|
||||
- GV, DL, PtrVT, 0,
|
||||
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
|
||||
-
|
||||
- // First argument to the descriptor call is the address of the descriptor
|
||||
- // itself.
|
||||
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
|
||||
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
|
||||
-
|
||||
// The call needs a relocation too for linker relaxation. It doesn't make
|
||||
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
|
||||
// the address.
|
||||
@@ -3214,7 +3177,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
|
||||
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
|
||||
|
||||
// Finally we can make a call to calculate the offset from tpidr_el0.
|
||||
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
|
||||
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
|
||||
} else
|
||||
llvm_unreachable("Unsupported ELF TLS access model");
|
||||
|
||||
Index: lib/Target/AArch64/AArch64ISelLowering.h
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64ISelLowering.h
|
||||
+++ lib/Target/AArch64/AArch64ISelLowering.h
|
||||
@@ -29,9 +29,9 @@ enum {
|
||||
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
|
||||
CALL, // Function call.
|
||||
|
||||
- // Almost the same as a normal call node, except that a TLSDesc relocation is
|
||||
- // needed so the linker can relax it correctly if possible.
|
||||
- TLSDESC_CALL,
|
||||
+ // Produces the full sequence of instructions for getting the thread pointer
|
||||
+ // offset of a variable into X0, using the TLSDesc model.
|
||||
+ TLSDESC_CALLSEQ,
|
||||
ADRP, // Page address of a TargetGlobalAddress operand.
|
||||
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
|
||||
LOADgot, // Load from automatically generated descriptor (e.g. Global
|
||||
@@ -399,8 +399,8 @@ class AArch64TargetLowering : public TargetLowerin
|
||||
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
|
||||
- SelectionDAG &DAG) const;
|
||||
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
|
||||
+ SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
Index: lib/Target/AArch64/AArch64InstrInfo.td
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64InstrInfo.td
|
||||
+++ lib/Target/AArch64/AArch64InstrInfo.td
|
||||
@@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCis
|
||||
|
||||
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
|
||||
SDTCisPtrTy<1>]>;
|
||||
+
|
||||
+// Generates the general dynamic sequences, i.e.
|
||||
+// adrp x0, :tlsdesc:var
|
||||
+// ldr x1, [x0, #:tlsdesc_lo12:var]
|
||||
+// add x0, x0, #:tlsdesc_lo12:var
|
||||
+// .tlsdesccall var
|
||||
+// blr x1
|
||||
+
|
||||
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
|
||||
+// number of operands (the variable)
|
||||
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
|
||||
+ [SDTCisPtrTy<0>]>;
|
||||
+
|
||||
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
|
||||
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
|
||||
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
|
||||
@@ -229,11 +242,12 @@ def AArch64Prefetch : SDNode<"AArch64ISD::P
|
||||
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
|
||||
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
|
||||
|
||||
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
|
||||
- SDT_AArch64TLSDescCall,
|
||||
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
|
||||
- SDNPVariadic]>;
|
||||
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
|
||||
+ SDT_AArch64TLSDescCallSeq,
|
||||
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
|
||||
+ SDNPVariadic]>;
|
||||
|
||||
+
|
||||
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
|
||||
SDT_AArch64WrapperLarge>;
|
||||
|
||||
@@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym)
|
||||
let AsmString = ".tlsdesccall $sym";
|
||||
}
|
||||
|
||||
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
|
||||
-// gets expanded to two MCInsts during lowering.
|
||||
-let isCall = 1, Defs = [LR] in
|
||||
-def TLSDESC_BLR
|
||||
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
|
||||
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
|
||||
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
|
||||
+// FIXME: can "hasSideEffects be dropped?
|
||||
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
|
||||
+ isCodeGenOnly = 1 in
|
||||
+def TLSDESC_CALLSEQ
|
||||
+ : Pseudo<(outs), (ins i64imm:$sym),
|
||||
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
|
||||
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
|
||||
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
|
||||
|
||||
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
|
||||
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Conditional branch (immediate) instruction.
|
||||
//===----------------------------------------------------------------------===//
|
||||
Index: lib/Target/AArch64/AArch64MCInstLower.cpp
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/AArch64MCInstLower.cpp
|
||||
+++ lib/Target/AArch64/AArch64MCInstLower.cpp
|
||||
@@ -22,9 +22,12 @@
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/CodeGen.h"
|
||||
+#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
using namespace llvm;
|
||||
|
||||
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
|
||||
+
|
||||
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
|
||||
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
|
||||
|
||||
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandEL
|
||||
if (MO.isGlobal()) {
|
||||
const GlobalValue *GV = MO.getGlobal();
|
||||
Model = Printer.TM.getTLSModel(GV);
|
||||
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
|
||||
+ Model == TLSModel::LocalDynamic)
|
||||
+ Model = TLSModel::GeneralDynamic;
|
||||
+
|
||||
} else {
|
||||
assert(MO.isSymbol() &&
|
||||
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
|
||||
"unexpected external TLS symbol");
|
||||
+ // The general dynamic access sequence is used to get the
|
||||
+ // address of _TLS_MODULE_BASE_.
|
||||
Model = TLSModel::GeneralDynamic;
|
||||
}
|
||||
switch (Model) {
|
||||
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandEL
|
||||
RefFlags |= AArch64MCExpr::VK_G1;
|
||||
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
|
||||
RefFlags |= AArch64MCExpr::VK_G0;
|
||||
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
|
||||
+ RefFlags |= AArch64MCExpr::VK_HI12;
|
||||
|
||||
if (MO.getTargetFlags() & AArch64II::MO_NC)
|
||||
RefFlags |= AArch64MCExpr::VK_NC;
|
||||
Index: lib/Target/AArch64/Utils/AArch64BaseInfo.h
|
||||
===================================================================
|
||||
--- lib/Target/AArch64/Utils/AArch64BaseInfo.h
|
||||
+++ lib/Target/AArch64/Utils/AArch64BaseInfo.h
|
||||
@@ -1229,7 +1229,7 @@ namespace AArch64II {
|
||||
|
||||
MO_NO_FLAG,
|
||||
|
||||
- MO_FRAGMENT = 0x7,
|
||||
+ MO_FRAGMENT = 0xf,
|
||||
|
||||
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
|
||||
/// offset of the 4K page containing the symbol. This is used with the
|
||||
@@ -1257,26 +1257,31 @@ namespace AArch64II {
|
||||
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
|
||||
MO_G0 = 6,
|
||||
|
||||
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
|
||||
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
|
||||
+ /// by-12-bits instruction.
|
||||
+ MO_HI12 = 7,
|
||||
+
|
||||
/// MO_GOT - This flag indicates that a symbol operand represents the
|
||||
/// address of the GOT entry for the symbol, rather than the address of
|
||||
/// the symbol itself.
|
||||
- MO_GOT = 8,
|
||||
+ MO_GOT = 0x10,
|
||||
|
||||
/// MO_NC - Indicates whether the linker is expected to check the symbol
|
||||
/// reference for overflow. For example in an ADRP/ADD pair of relocations
|
||||
/// the ADRP usually does check, but not the ADD.
|
||||
- MO_NC = 0x10,
|
||||
+ MO_NC = 0x20,
|
||||
|
||||
/// MO_TLS - Indicates that the operand being accessed is some kind of
|
||||
/// thread-local symbol. On Darwin, only one type of thread-local access
|
||||
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
|
||||
/// referee will affect interpretation.
|
||||
- MO_TLS = 0x20,
|
||||
+ MO_TLS = 0x40,
|
||||
|
||||
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
|
||||
/// the address of a constant pool entry for the symbol, rather than the
|
||||
/// address of the symbol itself.
|
||||
- MO_CONSTPOOL = 0x40
|
||||
+ MO_CONSTPOOL = 0x80
|
||||
};
|
||||
} // end namespace AArch64II
|
||||
|
||||
Index: test/CodeGen/AArch64/arm64-tls-dynamics.ll
|
||||
===================================================================
|
||||
--- test/CodeGen/AArch64/arm64-tls-dynamics.ll
|
||||
+++ test/CodeGen/AArch64/arm64-tls-dynamics.ll
|
||||
@@ -1,5 +1,7 @@
|
||||
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
|
||||
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
|
||||
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s
|
||||
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
|
||||
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s
|
||||
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s
|
||||
|
||||
@general_dynamic_var = external thread_local global i32
|
||||
|
||||
@@ -9,22 +11,34 @@ define i32 @test_generaldynamic() {
|
||||
%val = load i32* @general_dynamic_var
|
||||
ret i32 %val
|
||||
|
||||
- ; FIXME: the adrp instructions are redundant (if harmless).
|
||||
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
|
||||
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
|
||||
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
|
||||
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
|
||||
-; CHECK: .tlsdesccall general_dynamic_var
|
||||
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
|
||||
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
|
||||
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
|
||||
; CHECK-NEXT: blr [[CALLEE]]
|
||||
|
||||
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
|
||||
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
|
||||
+
|
||||
+
|
||||
; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
|
||||
; CHECK: ldr w0, [x[[TP]], x0]
|
||||
+; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK-NOLD: ldr w0, [x[[TP]], x0]
|
||||
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+
|
||||
}
|
||||
|
||||
define i32* @test_generaldynamic_addr() {
|
||||
@@ -32,12 +46,10 @@ define i32* @test_generaldynamic_addr() {
|
||||
|
||||
ret i32* @general_dynamic_var
|
||||
|
||||
- ; FIXME: the adrp instructions are redundant (if harmless).
|
||||
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
|
||||
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
|
||||
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
|
||||
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
|
||||
-; CHECK: .tlsdesccall general_dynamic_var
|
||||
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
|
||||
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
|
||||
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
|
||||
; CHECK-NEXT: blr [[CALLEE]]
|
||||
|
||||
; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
|
||||
@@ -44,9 +56,15 @@ define i32* @test_generaldynamic_addr() {
|
||||
; CHECK: add x0, [[TP]], x0
|
||||
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+
|
||||
}
|
||||
|
||||
@local_dynamic_var = external thread_local(localdynamic) global i32
|
||||
@@ -58,54 +76,71 @@ define i32 @test_localdynamic() {
|
||||
ret i32 %val
|
||||
|
||||
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
|
||||
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
|
||||
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
|
||||
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
|
||||
; CHECK-NEXT: blr [[CALLEE]]
|
||||
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
|
||||
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
|
||||
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]]
|
||||
|
||||
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
|
||||
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
|
||||
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
|
||||
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
|
||||
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0]
|
||||
|
||||
-; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]]
|
||||
|
||||
-; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
|
||||
-
|
||||
-; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
|
||||
-
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
|
||||
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+
|
||||
}
|
||||
|
||||
define i32* @test_localdynamic_addr() {
|
||||
; CHECK-LABEL: test_localdynamic_addr:
|
||||
|
||||
- ret i32* @local_dynamic_var
|
||||
-
|
||||
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
|
||||
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
|
||||
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
|
||||
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
|
||||
; CHECK-NEXT: blr [[CALLEE]]
|
||||
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
|
||||
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
|
||||
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK: add x0, x[[TPIDR]], x[[TPOFF]]
|
||||
|
||||
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
|
||||
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
|
||||
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
|
||||
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
|
||||
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
|
||||
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK-NOLD: add x0, x[[TPIDR]], x0
|
||||
+ ret i32* @local_dynamic_var
|
||||
|
||||
-; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]]
|
||||
-
|
||||
-; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0
|
||||
-
|
||||
-; CHECK: add x0, [[TPIDR]], [[TPREL]]
|
||||
-
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
|
||||
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
|
||||
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
|
||||
}
|
||||
|
||||
; The entire point of the local-dynamic access model is to have a single call to
|
||||
@@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() {
|
||||
%sum = add i32 %val, %val2
|
||||
ret i32 %sum
|
||||
|
||||
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
|
||||
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
|
||||
+; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
|
||||
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
|
||||
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE
|
||||
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
|
||||
; CHECK-NEXT: blr [[CALLEE]]
|
||||
|
||||
; CHECK-NOT: _TLS_MODULE_BASE_
|
||||
Index: test/CodeGen/AArch64/arm64-tls-execs.ll
|
||||
===================================================================
|
||||
--- test/CodeGen/AArch64/arm64-tls-execs.ll
|
||||
+++ test/CodeGen/AArch64/arm64-tls-execs.ll
|
||||
@@ -38,14 +38,13 @@ define i32 @test_local_exec() {
|
||||
; CHECK-LABEL: test_local_exec:
|
||||
%val = load i32* @local_exec_var
|
||||
|
||||
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
|
||||
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
|
||||
-; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
|
||||
-; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
|
||||
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
|
||||
+; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
|
||||
+; CHECK: ldr w0, [x[[R3]]]
|
||||
|
||||
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
|
||||
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
|
||||
-
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
|
||||
ret i32 %val
|
||||
}
|
||||
|
||||
@@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() {
|
||||
; CHECK-LABEL: test_local_exec_addr:
|
||||
ret i32* @local_exec_var
|
||||
|
||||
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
|
||||
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
|
||||
-; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
|
||||
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
|
||||
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
|
||||
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
|
||||
+; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
|
||||
+; CHECK: ret
|
||||
|
||||
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
|
||||
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
|
||||
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
Pull in r229911 from upstream llvm trunk (by Benjamin Kramer):
|
||||
|
||||
MC: Allow multiple comma-separated expressions on the .uleb128 directive.
|
||||
|
||||
For compatiblity with GNU as. Binutils documents this as
|
||||
'.uleb128 expressions'. Subtle, isn't it?
|
||||
|
||||
Introduced here: http://svnweb.freebsd.org/changeset/base/281775
|
||||
|
||||
Index: lib/MC/MCParser/AsmParser.cpp
|
||||
===================================================================
|
||||
--- lib/MC/MCParser/AsmParser.cpp
|
||||
+++ lib/MC/MCParser/AsmParser.cpp
|
||||
@@ -3636,22 +3636,28 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVa
|
||||
}
|
||||
|
||||
/// parseDirectiveLEB128
|
||||
-/// ::= (.sleb128 | .uleb128) expression
|
||||
+/// ::= (.sleb128 | .uleb128) [ expression (, expression)* ]
|
||||
bool AsmParser::parseDirectiveLEB128(bool Signed) {
|
||||
checkForValidSection();
|
||||
const MCExpr *Value;
|
||||
|
||||
- if (parseExpression(Value))
|
||||
- return true;
|
||||
+ for (;;) {
|
||||
+ if (parseExpression(Value))
|
||||
+ return true;
|
||||
|
||||
- if (getLexer().isNot(AsmToken::EndOfStatement))
|
||||
- return TokError("unexpected token in directive");
|
||||
+ if (Signed)
|
||||
+ getStreamer().EmitSLEB128Value(Value);
|
||||
+ else
|
||||
+ getStreamer().EmitULEB128Value(Value);
|
||||
|
||||
- if (Signed)
|
||||
- getStreamer().EmitSLEB128Value(Value);
|
||||
- else
|
||||
- getStreamer().EmitULEB128Value(Value);
|
||||
+ if (getLexer().is(AsmToken::EndOfStatement))
|
||||
+ break;
|
||||
|
||||
+ if (getLexer().isNot(AsmToken::Comma))
|
||||
+ return TokError("unexpected token in directive");
|
||||
+ Lex();
|
||||
+ }
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
Index: test/MC/ELF/uleb.s
|
||||
===================================================================
|
||||
--- test/MC/ELF/uleb.s
|
||||
+++ test/MC/ELF/uleb.s
|
||||
@@ -11,16 +11,17 @@ foo:
|
||||
.uleb128 128
|
||||
.uleb128 16383
|
||||
.uleb128 16384
|
||||
+ .uleb128 23, 42
|
||||
|
||||
// ELF_32: Name: .text
|
||||
// ELF_32: SectionData (
|
||||
-// ELF_32: 0000: 00017F80 01FF7F80 8001
|
||||
+// ELF_32: 0000: 00017F80 01FF7F80 8001172A
|
||||
// ELF_32: )
|
||||
// ELF_64: Name: .text
|
||||
// ELF_64: SectionData (
|
||||
-// ELF_64: 0000: 00017F80 01FF7F80 8001
|
||||
+// ELF_64: 0000: 00017F80 01FF7F80 8001172A
|
||||
// ELF_64: )
|
||||
// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
|
||||
-// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
|
||||
+// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001172a')
|
||||
// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
|
||||
-// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001')
|
||||
+// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001172a')
|
@ -6972,6 +6972,11 @@ def note_neon_vector_initializer_non_portable_q : Note<
|
||||
"vcombine_%0%1(vcreate_%0%1(), vcreate_%0%1()) to initialize from integer "
|
||||
"constants">;
|
||||
|
||||
def err_builtin_longjmp_unsupported : Error<
|
||||
"__builtin_longjmp is not supported for the current target">;
|
||||
def err_builtin_setjmp_unsupported : Error<
|
||||
"__builtin_setjmp is not supported for the current target">;
|
||||
|
||||
def err_builtin_longjmp_invalid_val : Error<
|
||||
"argument to __builtin_longjmp must be a constant 1">;
|
||||
def err_builtin_requires_language : Error<"'%0' is only available in %1">;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user