MF head @ r283796

This commit is contained in:
ngie 2015-05-31 01:02:05 +00:00
commit a31ce0ad92
653 changed files with 62016 additions and 16378 deletions

View File

@ -1276,8 +1276,8 @@ _sed= usr.bin/sed
.endif
.if ${BOOTSTRAPPING} < 1000002
_m4= lib/libohash \
usr.bin/m4
_libohash= lib/libohash
_m4= usr.bin/m4
${_bt}-usr.bin/m4: ${_bt}-lib/libohash
.endif
@ -1363,6 +1363,15 @@ _kerberos5_bootstrap_tools= \
.ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
.endif
.if ${MK_MANDOCDB} != "no"
_libohash?= lib/libohash
_makewhatis= lib/libsqlite3 \
usr.bin/mandoc
${_bt}-usr.bin/mandoc: ${_bt}-lib/libohash ${_bt}-lib/libsqlite3
.else
_makewhatis=usr.bin/makewhatis
.endif
# Rebuild up-to-date libmd for xinstall
${_bt}-usr.bin/xinstall: ${_bt}-lib/libmd
@ -1383,7 +1392,8 @@ bootstrap-tools: .PHONY
${_awk} \
${_cat} \
usr.bin/lorder \
usr.bin/makewhatis \
${_libohash} \
${_makewhatis} \
usr.bin/rpcgen \
${_sed} \
${_yacc} \
@ -1755,6 +1765,7 @@ _cddl_lib= cddl/lib
cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
cddl/lib/libzfs__L: lib/libgeom__L
cddl/lib/libctf__L: lib/libz__L
.endif
# cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
# on select architectures though (see cddl/lib/Makefile)
.if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" || \
@ -1762,7 +1773,6 @@ cddl/lib/libctf__L: lib/libz__L
${MACHINE_CPUARCH} == "arm"
_prebuild_libs+= lib/libproc lib/librtld_db
.endif
.endif
.if ${MK_CRYPT} != "no"
.if ${MK_OPENSSL} != "no"
@ -1806,9 +1816,10 @@ kerberos5/lib/libroken__L: lib/libcrypt__L
kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
kerberos5/lib/libheimbase__L: lib/libthr__L
kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
lib/libsqlite3__L: lib/libthr__L
.endif
lib/libsqlite3__L: lib/libthr__L
.if ${MK_GSSAPI} != "no"
_lib_libgssapi= lib/libgssapi
.endif

View File

@ -38,6 +38,71 @@
# xargs -n1 | sort | uniq -d;
# done
# 20150528: PCI IOV device driver methods moved to a separate kobj interface.
OLD_FILES+=usr/share/man/man9/PCI_ADD_VF.9
OLD_FILES+=usr/share/man/man9/PCI_INIT_IOV.9
OLD_FILES+=usr/share/man/man9/PCI_UNINIT_IOV.9
# 20150525: new clang import which bumps version from 3.6.0 to 3.6.1.
OLD_FILES+=usr/lib/clang/3.6.0/include/__stddef_max_align_t.h
OLD_FILES+=usr/lib/clang/3.6.0/include/__wmmintrin_aes.h
OLD_FILES+=usr/lib/clang/3.6.0/include/__wmmintrin_pclmul.h
OLD_FILES+=usr/lib/clang/3.6.0/include/adxintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/altivec.h
OLD_FILES+=usr/lib/clang/3.6.0/include/ammintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/arm_acle.h
OLD_FILES+=usr/lib/clang/3.6.0/include/arm_neon.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx2intrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512bwintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512erintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512fintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512vlbwintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avx512vlintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/avxintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/bmi2intrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/bmiintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/cpuid.h
OLD_FILES+=usr/lib/clang/3.6.0/include/emmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/f16cintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/fma4intrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/fmaintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/ia32intrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/immintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/lzcntintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/mm3dnow.h
OLD_FILES+=usr/lib/clang/3.6.0/include/mm_malloc.h
OLD_FILES+=usr/lib/clang/3.6.0/include/mmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/module.modulemap
OLD_FILES+=usr/lib/clang/3.6.0/include/nmmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/pmmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/popcntintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/prfchwintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/rdseedintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/rtmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/shaintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/smmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/tbmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/tmmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/wmmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/x86intrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/xmmintrin.h
OLD_FILES+=usr/lib/clang/3.6.0/include/xopintrin.h
OLD_DIRS+=usr/lib/clang/3.6.0/include
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan-x86_64.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan_cxx-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.asan_cxx-x86_64.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-arm.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.profile-x86_64.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.san-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.san-x86_64.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan-x86_64.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan_cxx-i386.a
OLD_FILES+=usr/lib/clang/3.6.0/lib/freebsd/libclang_rt.ubsan_cxx-x86_64.a
OLD_DIRS+=usr/lib/clang/3.6.0/lib/freebsd
OLD_DIRS+=usr/lib/clang/3.6.0/lib
OLD_DIRS+=usr/lib/clang/3.6.0
# 20150521
OLD_FILES+=usr/bin/demandoc
OLD_FILES+=usr/share/man/man1/demandoc.1.gz

View File

@ -31,6 +31,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 11.x IS SLOW:
disable the most expensive debugging functionality run
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
20150525:
Clang and llvm have been upgraded to 3.6.1 release. Please see the
20141231 entry below for information about prerequisites and upgrading,
if you are not already using 3.5.0 or higher.
20150521:
TI platform code switched to using vendor DTS files and this update
may break existing systems running on Beaglebone, Beaglebone Black,

View File

@ -29,7 +29,7 @@
.\" @(#)ps.1 8.3 (Berkeley) 4/18/94
.\" $FreeBSD$
.\"
.Dd May 22, 2015
.Dd May 27, 2015
.Dt PS 1
.Os
.Sh NAME
@ -350,6 +350,9 @@ the include file
.In sys/proc.h :
.Bl -column P2_INHERIT_PROTECTED 0x00000001
.It Dv "P2_INHERIT_PROTECTED" Ta No "0x00000001" Ta "New children get P_PROTECTED"
.It Dv "P2_NOTRACE" Ta No "0x00000002" Ta "No ptrace(2) attach or coredumps"
.It Dv "P2_NOTRACE_EXEC" Ta No "0x00000004" Ta "Keep P2_NOPTRACE on exec(2)"
.It Dv "P2_AST_SU" Ta No "0x00000008" Ta "Handles SU ast for kthreads"
.El
.It Cm label
The MAC label of the process.

View File

@ -21,7 +21,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 8, 2014
.Dd May 24, 2015
.Dt LOCKSTAT 1
.Os
.Sh NAME
@ -29,7 +29,7 @@
.Nd report kernel lock and profiling statistics
.Sh SYNOPSIS
.Nm
.Op Fl ACEHI
.Op Fl ACEHIV
.Op Fl e Ar event-list
.Op Fl i Ar rate
.Op Fl b | t | h | s Ar depth
@ -90,6 +90,10 @@ to
is restricted to super-user by default.
.Sh OPTIONS
The following options are supported:
.Bl -tag -width indent
.It Fl V
Print the D program used to gather the requested data.
.El
.Ss Event Selection
If no event selection options are specified, the default is
.Fl C .

View File

@ -299,6 +299,8 @@ usage(void)
{
(void) fprintf(stderr,
"Usage: lockstat [options] command [args]\n"
"\nGeneral options:\n\n"
" -V print the corresponding D program\n"
"\nEvent selection options:\n\n"
" -C watch contention events [on by default]\n"
" -E watch error events [off by default]\n"

View File

@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: _elftc.h 3175 2015-03-27 17:21:24Z emaste $
* $Id: _elftc.h 3209 2015-05-17 13:40:46Z kaiwang27 $
*/
/**
@ -342,12 +342,13 @@ struct name { \
#if defined(__GLIBC__) || defined(__linux__)
#ifndef _GNU_SOURCE
/*
* GLIBC based systems have a global 'char *' pointer referencing
* the executable's name.
*/
extern const char *program_invocation_short_name;
#endif /* !_GNU_SOURCE */
#define ELFTC_GETPROGNAME() program_invocation_short_name

View File

@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: elfdefinitions.h 3178 2015-03-30 18:29:13Z emaste $
* $Id: elfdefinitions.h 3198 2015-05-14 18:36:19Z emaste $
*/
/*
@ -565,6 +565,7 @@ _ELF_DEFINE_EM(EM_SPARC, 2, "SPARC") \
_ELF_DEFINE_EM(EM_386, 3, "Intel 80386") \
_ELF_DEFINE_EM(EM_68K, 4, "Motorola 68000") \
_ELF_DEFINE_EM(EM_88K, 5, "Motorola 88000") \
_ELF_DEFINE_EM(EM_IAMCU, 6, "Intel MCU") \
_ELF_DEFINE_EM(EM_860, 7, "Intel 80860") \
_ELF_DEFINE_EM(EM_MIPS, 8, "MIPS I Architecture") \
_ELF_DEFINE_EM(EM_S370, 9, "IBM System/370 Processor") \

View File

@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: elfcopy.h 3173 2015-03-27 16:46:13Z emaste $
* $Id: elfcopy.h 3221 2015-05-24 23:42:43Z kaiwang27 $
*/
#include <sys/queue.h>
@ -237,6 +237,7 @@ struct elfcopy {
uint64_t *secndx; /* section index map. */
uint64_t *symndx; /* symbol index map. */
unsigned char *v_rel; /* symbols needed by relocation. */
unsigned char *v_grp; /* symbols refered by section group. */
unsigned char *v_secsym; /* sections with section symbol. */
STAILQ_HEAD(, segment) v_seg; /* list of segments. */
STAILQ_HEAD(, sec_action) v_sac;/* list of section operations. */

View File

@ -39,7 +39,7 @@
#include "elfcopy.h"
ELFTC_VCSID("$Id: main.c 3174 2015-03-27 17:13:41Z emaste $");
ELFTC_VCSID("$Id: main.c 3216 2015-05-23 21:16:36Z kaiwang27 $");
enum options
{
@ -404,8 +404,19 @@ create_elf(struct elfcopy *ecp)
* Insert SHDR table into the internal section list as a "pseudo"
* section, so later it will get sorted and resynced just as "normal"
* sections.
*
* Under FreeBSD, Binutils objcopy always put the section header
* at the end of all the sections. We want to do the same here.
*
* However, note that the behaviour is still different with Binutils:
* elfcopy checks the FreeBSD OSABI tag to tell whether it needs to
* move the section headers, while Binutils is probably configured
* this way when it's compiled on FreeBSD.
*/
shtab = insert_shtab(ecp, 0);
if (oeh.e_ident[EI_OSABI] == ELFOSABI_FREEBSD)
shtab = insert_shtab(ecp, 1);
else
shtab = insert_shtab(ecp, 0);
/*
* Resync section offsets in the output object. This is needed
@ -485,6 +496,11 @@ free_elf(struct elfcopy *ecp)
free(sec);
}
}
if (ecp->secndx != NULL) {
free(ecp->secndx);
ecp->secndx = NULL;
}
}
/* Create a temporary file. */

View File

@ -34,7 +34,7 @@
#include "elfcopy.h"
ELFTC_VCSID("$Id: sections.c 3185 2015-04-11 08:56:34Z kaiwang27 $");
ELFTC_VCSID("$Id: sections.c 3220 2015-05-24 23:42:39Z kaiwang27 $");
static void add_gnu_debuglink(struct elfcopy *ecp);
static uint32_t calc_crc32(const char *p, size_t len, uint32_t crc);
@ -56,6 +56,7 @@ static void print_data(const char *d, size_t sz);
static void print_section(struct section *s);
static void *read_section(struct section *s, size_t *size);
static void update_reloc(struct elfcopy *ecp, struct section *s);
static void update_section_group(struct elfcopy *ecp, struct section *s);
int
is_remove_section(struct elfcopy *ecp, const char *name)
@ -552,6 +553,14 @@ copy_content(struct elfcopy *ecp)
(s->type == SHT_REL || s->type == SHT_RELA))
filter_reloc(ecp, s);
/*
* The section indices in the SHT_GROUP section needs
* to be updated since we might have stripped some
* sections and changed section numbering.
*/
if (s->type == SHT_GROUP)
update_section_group(ecp, s);
if (is_modify_section(ecp, s->name))
modify_section(ecp, s);
@ -571,6 +580,71 @@ copy_content(struct elfcopy *ecp)
}
}
/*
* Update section group section. The section indices in the SHT_GROUP
* section need update after section numbering changed.
*/
static void
update_section_group(struct elfcopy *ecp, struct section *s)
{
GElf_Shdr ish;
Elf_Data *id;
uint32_t *ws, *wd;
uint64_t n;
size_t ishnum;
int i, j;
if (!elf_getshnum(ecp->ein, &ishnum))
errx(EXIT_FAILURE, "elf_getshnum failed: %s",
elf_errmsg(-1));
if (gelf_getshdr(s->is, &ish) == NULL)
errx(EXIT_FAILURE, "gelf_getehdr() failed: %s",
elf_errmsg(-1));
if ((id = elf_getdata(s->is, NULL)) == NULL)
errx(EXIT_FAILURE, "elf_getdata() failed: %s",
elf_errmsg(-1));
if (ish.sh_size == 0)
return;
if (ish.sh_entsize == 0)
ish.sh_entsize = 4;
ws = id->d_buf;
/* We only support COMDAT section. */
#ifndef GRP_COMDAT
#define GRP_COMDAT 0x1
#endif
if ((*ws & GRP_COMDAT) == 0)
return;
if ((s->buf = malloc(ish.sh_size)) == NULL)
err(EXIT_FAILURE, "malloc failed");
s->sz = ish.sh_size;
wd = s->buf;
/* Copy the flag word as-is. */
*wd = *ws;
/* Update the section indices. */
n = ish.sh_size / ish.sh_entsize;
for(i = 1, j = 1; (uint64_t)i < n; i++) {
if (ws[i] != SHN_UNDEF && ws[i] < ishnum &&
ecp->secndx[ws[i]] != 0)
wd[j++] = ecp->secndx[ws[i]];
else
s->sz -= 4;
}
s->nocopy = 1;
}
/*
* Filter relocation entries, only keep those entries whose
* symbol is in the keep list.
@ -1028,8 +1102,11 @@ copy_shdr(struct elfcopy *ecp, struct section *s, const char *name, int copy,
osh.sh_flags |= SHF_WRITE;
if (sec_flags & SF_CODE)
osh.sh_flags |= SHF_EXECINSTR;
} else
} else {
osh.sh_flags = ish.sh_flags;
if (ish.sh_type == SHT_REL || ish.sh_type == SHT_RELA)
osh.sh_flags |= SHF_INFO_LINK;
}
}
if (name == NULL)

View File

@ -33,7 +33,7 @@
#include "elfcopy.h"
ELFTC_VCSID("$Id: symbols.c 3191 2015-05-04 17:07:01Z jkoshy $");
ELFTC_VCSID("$Id: symbols.c 3222 2015-05-24 23:47:23Z kaiwang27 $");
/* Symbol table buffer structure. */
struct symbuf {
@ -77,7 +77,8 @@ static int is_weak_symbol(unsigned char st_info);
static int lookup_exact_string(hash_head *hash, const char *buf,
const char *s);
static int generate_symbols(struct elfcopy *ecp);
static void mark_symbols(struct elfcopy *ecp, size_t sc);
static void mark_reloc_symbols(struct elfcopy *ecp, size_t sc);
static void mark_section_group_symbols(struct elfcopy *ecp, size_t sc);
static int match_wildcard(const char *name, const char *pattern);
uint32_t str_hash(const char *s);
@ -160,6 +161,10 @@ is_needed_symbol(struct elfcopy *ecp, int i, GElf_Sym *s)
if (BIT_ISSET(ecp->v_rel, i))
return (1);
/* Symbols refered by COMDAT sections are needed. */
if (BIT_ISSET(ecp->v_grp, i))
return (1);
/*
* For relocatable files (.o files), global and weak symbols
* are needed.
@ -207,7 +212,10 @@ is_remove_symbol(struct elfcopy *ecp, size_t sc, int i, GElf_Sym *s,
return (1);
if (ecp->v_rel == NULL)
mark_symbols(ecp, sc);
mark_reloc_symbols(ecp, sc);
if (ecp->v_grp == NULL)
mark_section_group_symbols(ecp, sc);
if (is_needed_symbol(ecp, i, s))
return (0);
@ -233,7 +241,7 @@ is_remove_symbol(struct elfcopy *ecp, size_t sc, int i, GElf_Sym *s,
* Mark symbols refered by relocation entries.
*/
static void
mark_symbols(struct elfcopy *ecp, size_t sc)
mark_reloc_symbols(struct elfcopy *ecp, size_t sc)
{
const char *name;
Elf_Data *d;
@ -311,6 +319,49 @@ mark_symbols(struct elfcopy *ecp, size_t sc)
elf_errmsg(elferr));
}
static void
mark_section_group_symbols(struct elfcopy *ecp, size_t sc)
{
const char *name;
Elf_Scn *s;
GElf_Shdr sh;
size_t indx;
int elferr;
ecp->v_grp = calloc((sc + 7) / 8, 1);
if (ecp->v_grp == NULL)
err(EXIT_FAILURE, "calloc failed");
if (elf_getshstrndx(ecp->ein, &indx) == 0)
errx(EXIT_FAILURE, "elf_getshstrndx failed: %s",
elf_errmsg(-1));
s = NULL;
while ((s = elf_nextscn(ecp->ein, s)) != NULL) {
if (gelf_getshdr(s, &sh) != &sh)
errx(EXIT_FAILURE, "elf_getshdr failed: %s",
elf_errmsg(-1));
if (sh.sh_type != SHT_GROUP)
continue;
if ((name = elf_strptr(ecp->ein, indx, sh.sh_name)) == NULL)
errx(EXIT_FAILURE, "elf_strptr failed: %s",
elf_errmsg(-1));
if (is_remove_section(ecp, name))
continue;
if (sh.sh_info > 0 && sh.sh_info < sc)
BIT_SET(ecp->v_grp, sh.sh_info);
else if (sh.sh_info != 0)
warnx("invalid symbox index");
}
elferr = elf_errno();
if (elferr != 0)
errx(EXIT_FAILURE, "elf_nextscn failed: %s",
elf_errmsg(elferr));
}
static int
generate_symbols(struct elfcopy *ecp)
{
@ -351,6 +402,8 @@ generate_symbols(struct elfcopy *ecp)
ecp->symtab->buf = sy_buf;
ecp->strtab->buf = st_buf;
gsym = NULL;
/*
* Create bit vector v_secsym, which is used to mark sections
* that already have corresponding STT_SECTION symbols.
@ -384,7 +437,7 @@ generate_symbols(struct elfcopy *ecp)
/* Symbol table should exist if this function is called. */
if (symndx == 0) {
warnx("can't find .strtab section");
return (0);
goto clean;
}
/* Locate .symtab of input object. */
@ -413,7 +466,6 @@ generate_symbols(struct elfcopy *ecp)
* output object, it is used by update_reloc() later to update
* relocation information.
*/
gsym = NULL;
sc = ish.sh_size / ish.sh_entsize;
if (sc > 0) {
ecp->symndx = calloc(sc, sizeof(*ecp->symndx));
@ -427,7 +479,7 @@ generate_symbols(struct elfcopy *ecp)
if (elferr != 0)
errx(EXIT_FAILURE, "elf_getdata failed: %s",
elf_errmsg(elferr));
return (0);
goto clean;
}
} else
return (0);
@ -523,7 +575,7 @@ generate_symbols(struct elfcopy *ecp)
* check if that only local symbol is the reserved symbol.
*/
if (sy_buf->nls <= 1 && sy_buf->ngs == 0)
return (0);
goto clean;
/*
* Create STT_SECTION symbols for sections that do not already
@ -550,6 +602,7 @@ generate_symbols(struct elfcopy *ecp)
sym.st_value = s->vma;
sym.st_size = 0;
sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
sym.st_other = STV_DEFAULT;
/*
* Don't let add_to_symtab() touch sym.st_shndx.
* In this case, we know the index already.
@ -583,6 +636,12 @@ generate_symbols(struct elfcopy *ecp)
}
return (1);
clean:
free(gsym);
free_symtab(ecp);
return (0);
}
void
@ -624,7 +683,9 @@ create_symtab(struct elfcopy *ecp)
if (((ecp->flags & SYMTAB_INTACT) == 0) && !generate_symbols(ecp)) {
TAILQ_REMOVE(&ecp->v_sec, ecp->symtab, sec_list);
TAILQ_REMOVE(&ecp->v_sec, ecp->strtab, sec_list);
free(ecp->symtab->buf);
free(ecp->symtab);
free(ecp->strtab->buf);
free(ecp->strtab);
ecp->symtab = NULL;
ecp->strtab = NULL;
@ -697,6 +758,23 @@ free_symtab(struct elfcopy *ecp)
}
}
}
if (ecp->symndx != NULL) {
free(ecp->symndx);
ecp->symndx = NULL;
}
if (ecp->v_rel != NULL) {
free(ecp->v_rel);
ecp->v_rel = NULL;
}
if (ecp->v_grp != NULL) {
free(ecp->v_grp);
ecp->v_grp = NULL;
}
if (ecp->v_secsym != NULL) {
free(ecp->v_secsym);
ecp->v_secsym = NULL;
}
}
void

View File

@ -26,7 +26,7 @@
#include "_libdwarf.h"
ELFTC_VCSID("$Id: libdwarf_reloc.c 3149 2015-02-15 19:00:06Z emaste $");
ELFTC_VCSID("$Id: libdwarf_reloc.c 3198 2015-05-14 18:36:19Z emaste $");
Dwarf_Unsigned
_dwarf_get_reloc_type(Dwarf_P_Debug dbg, int is64)
@ -75,6 +75,7 @@ _dwarf_get_reloc_size(Dwarf_Debug dbg, Dwarf_Unsigned rel_type)
return (4);
break;
case EM_386:
case EM_IAMCU:
if (rel_type == R_386_32)
return (4);
break;

View File

@ -36,7 +36,7 @@
#include "_libelftc.h"
ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3194 2015-05-05 17:55:16Z emaste $");
ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3212 2015-05-17 13:40:55Z kaiwang27 $");
/**
* @file cpp_demangle.c
@ -50,7 +50,7 @@ ELFTC_VCSID("$Id: libelftc_dem_gnu3.c 3194 2015-05-05 17:55:16Z emaste $");
enum type_qualifier {
TYPE_PTR, TYPE_REF, TYPE_CMX, TYPE_IMG, TYPE_EXT, TYPE_RST, TYPE_VAT,
TYPE_CST
TYPE_CST, TYPE_VEC
};
struct vector_type_qualifier {
@ -84,6 +84,7 @@ struct cpp_demangle_data {
int func_type;
const char *cur; /* current mangled name ptr */
const char *last_sname; /* last source name */
int push_head;
};
#define CPP_DEMANGLE_TRY_LIMIT 128
@ -112,6 +113,8 @@ static int cpp_demangle_read_array(struct cpp_demangle_data *);
static int cpp_demangle_read_encoding(struct cpp_demangle_data *);
static int cpp_demangle_read_expr_primary(struct cpp_demangle_data *);
static int cpp_demangle_read_expression(struct cpp_demangle_data *);
static int cpp_demangle_read_expression_flat(struct cpp_demangle_data *,
char **);
static int cpp_demangle_read_expression_binary(struct cpp_demangle_data *,
const char *, size_t);
static int cpp_demangle_read_expression_unary(struct cpp_demangle_data *,
@ -123,8 +126,12 @@ static int cpp_demangle_read_function(struct cpp_demangle_data *, int *,
static int cpp_demangle_local_source_name(struct cpp_demangle_data *ddata);
static int cpp_demangle_read_local_name(struct cpp_demangle_data *);
static int cpp_demangle_read_name(struct cpp_demangle_data *);
static int cpp_demangle_read_name_flat(struct cpp_demangle_data *,
char**);
static int cpp_demangle_read_nested_name(struct cpp_demangle_data *);
static int cpp_demangle_read_number(struct cpp_demangle_data *, long *);
static int cpp_demangle_read_number_as_string(struct cpp_demangle_data *,
char **);
static int cpp_demangle_read_nv_offset(struct cpp_demangle_data *);
static int cpp_demangle_read_offset(struct cpp_demangle_data *);
static int cpp_demangle_read_offset_number(struct cpp_demangle_data *);
@ -138,6 +145,8 @@ static int cpp_demangle_read_tmpl_arg(struct cpp_demangle_data *);
static int cpp_demangle_read_tmpl_args(struct cpp_demangle_data *);
static int cpp_demangle_read_tmpl_param(struct cpp_demangle_data *);
static int cpp_demangle_read_type(struct cpp_demangle_data *, int);
static int cpp_demangle_read_type_flat(struct cpp_demangle_data *,
char **);
static int cpp_demangle_read_uqname(struct cpp_demangle_data *);
static int cpp_demangle_read_v_offset(struct cpp_demangle_data *);
static char *decode_fp_to_double(const char *, size_t);
@ -156,8 +165,6 @@ static int vector_type_qualifier_init(struct vector_type_qualifier *);
static int vector_type_qualifier_push(struct vector_type_qualifier *,
enum type_qualifier);
static int cpp_demangle_gnu3_push_head;
/**
* @brief Decode the input string by IA-64 C++ ABI style.
*
@ -190,7 +197,6 @@ cpp_demangle_gnu3(const char *org)
if (!cpp_demangle_data_init(&ddata, org + 2))
return (NULL);
cpp_demangle_gnu3_push_head = 0;
rtn = NULL;
if (!cpp_demangle_read_encoding(&ddata))
@ -277,6 +283,7 @@ cpp_demangle_data_init(struct cpp_demangle_data *d, const char *cur)
d->func_type = 0;
d->cur = cur;
d->last_sname = NULL;
d->push_head = 0;
return (1);
@ -309,7 +316,6 @@ cpp_demangle_push_fp(struct cpp_demangle_data *ddata,
fp = ddata->cur;
while (*ddata->cur != 'E')
++ddata->cur;
++ddata->cur;
if ((f = decoder(fp, ddata->cur - fp)) == NULL)
return (0);
@ -320,6 +326,8 @@ cpp_demangle_push_fp(struct cpp_demangle_data *ddata,
free(f);
++ddata->cur;
return (rtn);
}
@ -331,7 +339,7 @@ cpp_demangle_push_str(struct cpp_demangle_data *ddata, const char *str,
if (ddata == NULL || str == NULL || len == 0)
return (0);
if (cpp_demangle_gnu3_push_head > 0)
if (ddata->push_head > 0)
return (vector_str_push(&ddata->output_tmp, str, len));
return (vector_str_push(&ddata->output, str, len));
@ -403,7 +411,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, "*", 1))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
@ -414,7 +423,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, "&", 1))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
@ -425,7 +435,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " complex", 8))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
@ -434,23 +445,26 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (!cpp_demangle_push_str(ddata, " imaginary", 10))
goto clean;
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " imaginary", 10))
if (!vector_str_push(&subst_v, " imaginary",
10))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
case TYPE_EXT:
if (e_idx > v->ext_name.size - 1)
if (v->ext_name.size == 0 ||
e_idx > v->ext_name.size - 1)
goto clean;
if ((e_len = strlen(v->ext_name.container[e_idx])) == 0)
if ((e_len = strlen(v->ext_name.container[e_idx])) ==
0)
goto clean;
if ((buf = malloc(sizeof(char) * (e_len + 1))) == NULL)
if ((buf = malloc(e_len + 2)) == NULL)
goto clean;
memcpy(buf, " ", 1);
memcpy(buf + 1, v->ext_name.container[e_idx], e_len);
snprintf(buf, e_len + 2, " %s",
v->ext_name.container[e_idx]);
if (!cpp_demangle_push_str(ddata, buf, e_len + 1)) {
free(buf);
@ -463,7 +477,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
free(buf);
goto clean;
}
if (!cpp_demangle_push_subst_v(ddata, &subst_v)) {
if (!cpp_demangle_push_subst_v(ddata,
&subst_v)) {
free(buf);
goto clean;
}
@ -478,7 +493,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " restrict", 9))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
@ -489,7 +505,8 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " volatile", 9))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
@ -500,11 +517,42 @@ cpp_demangle_push_type_qualifier(struct cpp_demangle_data *ddata,
if (type_str != NULL) {
if (!vector_str_push(&subst_v, " const", 6))
goto clean;
if (!cpp_demangle_push_subst_v(ddata, &subst_v))
if (!cpp_demangle_push_subst_v(ddata,
&subst_v))
goto clean;
}
break;
case TYPE_VEC:
if (v->ext_name.size == 0 ||
e_idx > v->ext_name.size - 1)
goto clean;
if ((e_len = strlen(v->ext_name.container[e_idx])) ==
0)
goto clean;
if ((buf = malloc(e_len + 12)) == NULL)
goto clean;
snprintf(buf, e_len + 12, " __vector(%s)",
v->ext_name.container[e_idx]);
if (!cpp_demangle_push_str(ddata, buf, e_len + 11)) {
free(buf);
goto clean;
}
if (type_str != NULL) {
if (!vector_str_push(&subst_v, buf,
e_len + 11)) {
free(buf);
goto clean;
}
if (!cpp_demangle_push_subst_v(ddata,
&subst_v)) {
free(buf);
goto clean;
}
}
free(buf);
++e_idx;
break;
};
--idx;
}
@ -655,10 +703,14 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
switch (*ddata->cur) {
case 'b':
if (*(ddata->cur + 2) != 'E')
return (0);
switch (*(++ddata->cur)) {
case '0':
ddata->cur += 2;
return (cpp_demangle_push_str(ddata, "false", 5));
case '1':
ddata->cur += 2;
return (cpp_demangle_push_str(ddata, "true", 4));
default:
return (0);
@ -707,7 +759,8 @@ cpp_demangle_read_expr_primary(struct cpp_demangle_data *ddata)
++ddata->cur;
}
++ddata->cur;
return (cpp_demangle_push_str(ddata, num, ddata->cur - num));
return (cpp_demangle_push_str(ddata, num,
ddata->cur - num - 1));
default:
return (0);
@ -998,6 +1051,38 @@ cpp_demangle_read_expression(struct cpp_demangle_data *ddata)
return (0);
}
static int
cpp_demangle_read_expression_flat(struct cpp_demangle_data *ddata, char **str)
{
struct vector_str *output;
size_t i, p_idx, idx, exp_len;
char *exp;
output = ddata->push_head > 0 ? &ddata->output_tmp :
&ddata->output;
p_idx = output->size;
if (!cpp_demangle_read_expression(ddata))
return (0);
if ((exp = vector_str_substr(output, p_idx, output->size - 1,
&exp_len)) == NULL)
return (0);
idx = output->size;
for (i = p_idx; i < idx; ++i) {
if (!vector_str_pop(output)) {
free(exp);
return (0);
}
}
*str = exp;
return (1);
}
static int
cpp_demangle_read_expression_binary(struct cpp_demangle_data *ddata,
const char *name, size_t len)
@ -1127,12 +1212,65 @@ cpp_demangle_read_function(struct cpp_demangle_data *ddata, int *ext_c,
static int
cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
{
char *name, *type, *num_str;
long offset;
int rtn;
if (ddata == NULL || *ddata->cur == '\0')
return (0);
/* special name */
switch (SIMPLE_HASH(*ddata->cur, *(ddata->cur + 1))) {
case SIMPLE_HASH('G', 'A'):
if (!cpp_demangle_push_str(ddata, "hidden alias for ", 17))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_encoding(ddata));
case SIMPLE_HASH('G', 'R'):
if (!cpp_demangle_push_str(ddata, "reference temporary #", 21))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
if (!cpp_demangle_read_name_flat(ddata, &name))
return (0);
rtn = 0;
if (!cpp_demangle_read_number_as_string(ddata, &num_str))
goto clean1;
if (!cpp_demangle_push_str(ddata, num_str, strlen(num_str)))
goto clean2;
if (!cpp_demangle_push_str(ddata, " for ", 5))
goto clean2;
if (!cpp_demangle_push_str(ddata, name, strlen(name)))
goto clean2;
rtn = 1;
clean2:
free(num_str);
clean1:
free(name);
return (rtn);
case SIMPLE_HASH('G', 'T'):
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
switch (*ddata->cur) {
case 'n':
if (!cpp_demangle_push_str(ddata,
"non-transaction clone for ", 26))
return (0);
case 't':
default:
if (!cpp_demangle_push_str(ddata,
"transaction clone for ", 22))
return (0);
}
++ddata->cur;
return (cpp_demangle_read_encoding(ddata));
case SIMPLE_HASH('G', 'V'):
/* sentry object for 1 time init */
if (!cpp_demangle_push_str(ddata, "guard variable for ", 20))
@ -1154,14 +1292,49 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
return (0);
return (cpp_demangle_read_encoding(ddata));
case SIMPLE_HASH('T', 'C'):
/* construction vtable */
if (!cpp_demangle_push_str(ddata, "construction vtable for ",
24))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
if (!cpp_demangle_read_type_flat(ddata, &type))
return (0);
rtn = 0;
if (!cpp_demangle_read_number(ddata, &offset))
goto clean3;
if (*ddata->cur++ != '_')
goto clean3;
if (!cpp_demangle_read_type(ddata, 0))
goto clean3;
if (!cpp_demangle_push_str(ddata, "-in-", 4))
goto clean3;
if (!cpp_demangle_push_str(ddata, type, strlen(type)))
goto clean3;
rtn = 1;
clean3:
free(type);
return (rtn);
case SIMPLE_HASH('T', 'D'):
/* typeinfo common proxy */
break;
case SIMPLE_HASH('T', 'F'):
/* typeinfo fn */
if (!cpp_demangle_push_str(ddata, "typeinfo fn for ", 16))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'h'):
/* virtual function non-virtual override thunk */
if (cpp_demangle_push_str(ddata,
"virtual function non-virtual override ", 38) == 0)
if (!cpp_demangle_push_str(ddata,
"virtual function non-virtual override ", 38))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
@ -1170,24 +1343,51 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
return (0);
return (cpp_demangle_read_encoding(ddata));
case SIMPLE_HASH('T', 'I'):
/* typeinfo structure */
/* FALLTHROUGH */
case SIMPLE_HASH('T', 'S'):
/* RTTI name (NTBS) */
if (!cpp_demangle_push_str(ddata, "typeinfo for ", 14))
case SIMPLE_HASH('T', 'H'):
/* TLS init function */
if (!cpp_demangle_push_str(ddata, "TLS init function for ",
22))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 1));
break;
case SIMPLE_HASH('T', 'I'):
/* typeinfo structure */
if (!cpp_demangle_push_str(ddata, "typeinfo for ", 13))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'J'):
/* java class */
if (!cpp_demangle_push_str(ddata, "java Class for ", 15))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'S'):
/* RTTI name (NTBS) */
if (!cpp_demangle_push_str(ddata, "typeinfo name for ", 18))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'T'):
/* VTT table */
if (!cpp_demangle_push_str(ddata, "VTT for ", 8))
return (0);
ddata->cur += 2;
return (cpp_demangle_read_type(ddata, 1));
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'v'):
/* virtual function virtual override thunk */
@ -1208,7 +1408,17 @@ cpp_demangle_read_encoding(struct cpp_demangle_data *ddata)
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
return (cpp_demangle_read_type(ddata, 1));
return (cpp_demangle_read_type(ddata, 0));
case SIMPLE_HASH('T', 'W'):
/* TLS wrapper function */
if (!cpp_demangle_push_str(ddata, "TLS wrapper function for ",
25))
return (0);
ddata->cur += 2;
if (*ddata->cur == '\0')
return (0);
break;
};
return (cpp_demangle_read_name(ddata));
@ -1270,8 +1480,7 @@ cpp_demangle_read_name(struct cpp_demangle_data *ddata)
if (ddata == NULL || *ddata->cur == '\0')
return (0);
output = cpp_demangle_gnu3_push_head > 0 ?
&ddata->output_tmp : &ddata->output;
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
subst_str = NULL;
@ -1326,6 +1535,38 @@ clean:
return (rtn);
}
static int
cpp_demangle_read_name_flat(struct cpp_demangle_data *ddata, char **str)
{
struct vector_str *output;
size_t i, p_idx, idx, name_len;
char *name;
output = ddata->push_head > 0 ? &ddata->output_tmp :
&ddata->output;
p_idx = output->size;
if (!cpp_demangle_read_name(ddata))
return (0);
if ((name = vector_str_substr(output, p_idx, output->size - 1,
&name_len)) == NULL)
return (0);
idx = output->size;
for (i = p_idx; i < idx; ++i) {
if (!vector_str_pop(output)) {
free(name);
return (0);
}
}
*str = name;
return (1);
}
static int
cpp_demangle_read_nested_name(struct cpp_demangle_data *ddata)
{
@ -1355,8 +1596,7 @@ cpp_demangle_read_nested_name(struct cpp_demangle_data *ddata)
++ddata->cur;
}
output = cpp_demangle_gnu3_push_head > 0 ?
&ddata->output_tmp : &ddata->output;
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
if (!vector_str_init(&v))
return (0);
@ -1452,6 +1692,24 @@ cpp_demangle_read_number(struct cpp_demangle_data *ddata, long *rtn)
return (1);
}
static int
cpp_demangle_read_number_as_string(struct cpp_demangle_data *ddata, char **str)
{
long n;
if (!cpp_demangle_read_number(ddata, &n)) {
*str = NULL;
return (0);
}
if (asprintf(str, "%ld", n) < 0) {
*str = NULL;
return (0);
}
return (1);
}
static int
cpp_demangle_read_nv_offset(struct cpp_demangle_data *ddata)
{
@ -1581,9 +1839,18 @@ static int
cpp_demangle_read_sname(struct cpp_demangle_data *ddata)
{
long len;
int err;
if (ddata == NULL || cpp_demangle_read_number(ddata, &len) == 0 ||
len <= 0 || cpp_demangle_push_str(ddata, ddata->cur, len) == 0)
len <= 0)
return (0);
if (len == 12 && (memcmp("_GLOBAL__N_1", ddata->cur, 12) == 0))
err = cpp_demangle_push_str(ddata, "(anonymous namespace)", 21);
else
err = cpp_demangle_push_str(ddata, ddata->cur, len);
if (err == 0)
return (0);
assert(ddata->output.size > 0);
@ -1732,8 +1999,7 @@ cpp_demangle_read_subst_std(struct cpp_demangle_data *ddata)
ddata->cur += 2;
output = cpp_demangle_gnu3_push_head > 0 ?
&ddata->output_tmp : &ddata->output;
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
p_idx = output->size;
if (!cpp_demangle_read_uqname(ddata))
@ -1783,8 +2049,7 @@ cpp_demangle_read_subst_stdtmpl(struct cpp_demangle_data *ddata,
if (ddata == NULL || str == NULL || len == 0)
return (0);
output = cpp_demangle_gnu3_push_head > 0 ? &ddata->output_tmp :
&ddata->output;
output = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
p_idx = output->size;
substr = NULL;
@ -1852,8 +2117,7 @@ cpp_demangle_read_tmpl_args(struct cpp_demangle_data *ddata)
return (0);
limit = 0;
v = cpp_demangle_gnu3_push_head > 0 ?
&ddata->output_tmp : &ddata->output;
v = ddata->push_head > 0 ? &ddata->output_tmp : &ddata->output;
for (;;) {
idx = v->size;
if (!cpp_demangle_read_tmpl_arg(ddata))
@ -1936,14 +2200,14 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
size_t p_idx, type_str_len;
int extern_c, is_builtin;
long len;
char *type_str;
char *type_str, *exp_str, *num_str;
if (ddata == NULL)
return (0);
output = &ddata->output;
if (!strncmp(ddata->output.container[ddata->output.size - 1], ">", 1)) {
cpp_demangle_gnu3_push_head++;
ddata->push_head++;
output = &ddata->output_tmp;
} else if (delimit == 1) {
if (ddata->paren == false) {
@ -1978,7 +2242,7 @@ cpp_demangle_read_type(struct cpp_demangle_data *ddata, int delimit)
extern_c = 0;
is_builtin = 1;
p_idx = output->size;
type_str = NULL;
type_str = exp_str = num_str = NULL;
again:
/* builtin type */
switch (*ddata->cur) {
@ -2024,6 +2288,82 @@ again:
++ddata->cur;
goto rtn;
case 'D':
++ddata->cur;
switch (*ddata->cur) {
case 'd':
/* IEEE 754r decimal floating point (64 bits) */
if (!cpp_demangle_push_str(ddata, "decimal64", 9))
goto clean;
++ddata->cur;
break;
case 'e':
/* IEEE 754r decimal floating point (128 bits) */
if (!cpp_demangle_push_str(ddata, "decimal128", 10))
goto clean;
++ddata->cur;
break;
case 'f':
/* IEEE 754r decimal floating point (32 bits) */
if (!cpp_demangle_push_str(ddata, "decimal32", 9))
goto clean;
++ddata->cur;
break;
case 'h':
/* IEEE 754r half-precision floating point (16 bits) */
if (!cpp_demangle_push_str(ddata, "half", 4))
goto clean;
++ddata->cur;
break;
case 'i':
/* char32_t */
if (!cpp_demangle_push_str(ddata, "char32_t", 8))
goto clean;
++ddata->cur;
break;
case 'n':
/* std::nullptr_t (i.e., decltype(nullptr)) */
if (!cpp_demangle_push_str(ddata, "decltype(nullptr)",
17))
goto clean;
++ddata->cur;
break;
case 's':
/* char16_t */
if (!cpp_demangle_push_str(ddata, "char16_t", 8))
goto clean;
++ddata->cur;
break;
case 'v':
/* gcc vector_size extension. */
++ddata->cur;
if (*ddata->cur == '_') {
++ddata->cur;
if (!cpp_demangle_read_expression_flat(ddata,
&exp_str))
goto clean;
if (!vector_str_push(&v.ext_name, exp_str,
strlen(exp_str)))
goto clean;
} else {
if (!cpp_demangle_read_number_as_string(ddata,
&num_str))
goto clean;
if (!vector_str_push(&v.ext_name, num_str,
strlen(num_str)))
goto clean;
}
if (*ddata->cur != '_')
goto clean;
++ddata->cur;
if (!vector_type_qualifier_push(&v, TYPE_VEC))
goto clean;
goto again;
default:
goto clean;
}
goto rtn;
case 'e':
/* long double */
if (!cpp_demangle_push_str(ddata, "long double", 11))
@ -2118,7 +2458,7 @@ again:
case 'o':
/* unsigned __int128 */
if (!cpp_demangle_push_str(ddata, "unsigned _;int128", 17))
if (!cpp_demangle_push_str(ddata, "unsigned __int128", 17))
goto clean;
++ddata->cur;
goto rtn;
@ -2189,6 +2529,8 @@ again:
if (!vector_str_push(&v.ext_name, ddata->cur, len))
return (0);
ddata->cur += len;
if (!vector_type_qualifier_push(&v, TYPE_EXT))
goto clean;
goto again;
case 'v':
@ -2253,14 +2595,16 @@ rtn:
goto clean;
free(type_str);
free(exp_str);
free(num_str);
vector_type_qualifier_dest(&v);
if (cpp_demangle_gnu3_push_head > 0) {
if (ddata->push_head > 0) {
if (*ddata->cur == 'I' && cpp_demangle_read_tmpl_args(ddata)
== 0)
return (0);
if (--cpp_demangle_gnu3_push_head > 0)
if (--ddata->push_head > 0)
return (1);
if (!vector_str_push(&ddata->output_tmp, " ", 1))
@ -2284,11 +2628,45 @@ rtn:
return (1);
clean:
free(type_str);
free(exp_str);
free(num_str);
vector_type_qualifier_dest(&v);
return (0);
}
static int
cpp_demangle_read_type_flat(struct cpp_demangle_data *ddata, char **str)
{
struct vector_str *output;
size_t i, p_idx, idx, type_len;
char *type;
output = ddata->push_head > 0 ? &ddata->output_tmp :
&ddata->output;
p_idx = output->size;
if (!cpp_demangle_read_type(ddata, 0))
return (0);
if ((type = vector_str_substr(output, p_idx, output->size - 1,
&type_len)) == NULL)
return (0);
idx = output->size;
for (i = p_idx; i < idx; ++i) {
if (!vector_str_pop(output)) {
free(type);
return (0);
}
}
*str = type;
return (1);
}
/*
* read unqualified-name, unqualified name are operator-name, ctor-dtor-name,
* source-name

View File

@ -1,3 +1,3 @@
# $Id: os.Linux.mk 994 2010-06-13 10:39:19Z jkoshy $
# $Id: os.Linux.mk 3210 2015-05-17 13:40:49Z kaiwang27 $
CFLAGS+= -Wall
CFLAGS+= -Wall -D_GNU_SOURCE

View File

@ -22,7 +22,7 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
.\" $Id: readelf.1 3195 2015-05-12 17:22:19Z emaste $
.\" $Id: readelf.1 3219 2015-05-24 23:42:34Z kaiwang27 $
.\"
.Dd September 13, 2012
.Os
@ -90,7 +90,7 @@ sections in the ELF object.
.It Fl e | Fl -headers
Print all program, file and section headers in the ELF object.
.It Fl g | Fl -section-groups
This option is recognized, but is ignored.
Print the contents of the section groups in the ELF object.
.It Fl h | Fl -file-header
Print the file header of the ELF object.
.It Fl l | Fl -program-headers

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2009-2014 Kai Wang
* Copyright (c) 2009-2015 Kai Wang
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -46,7 +46,7 @@
#include "_elftc.h"
ELFTC_VCSID("$Id: readelf.c 3189 2015-04-20 17:02:01Z emaste $");
ELFTC_VCSID("$Id: readelf.c 3223 2015-05-25 20:37:57Z emaste $");
/*
* readelf(1) options.
@ -302,6 +302,7 @@ static void dump_gnu_hash(struct readelf *re, struct section *s);
static void dump_hash(struct readelf *re);
static void dump_phdr(struct readelf *re);
static void dump_ppc_attributes(uint8_t *p, uint8_t *pe);
static void dump_section_groups(struct readelf *re);
static void dump_symtab(struct readelf *re, int i);
static void dump_symtabs(struct readelf *re);
static uint8_t *dump_unknown_tag(uint64_t tag, uint8_t *p);
@ -445,6 +446,7 @@ elf_machine(unsigned int mach)
case EM_SPARC: return "Sun SPARC";
case EM_386: return "Intel i386";
case EM_68K: return "Motorola 68000";
case EM_IAMCU: return "Intel MCU";
case EM_88K: return "Motorola 88000";
case EM_860: return "Intel i860";
case EM_MIPS: return "MIPS R3000 Big-Endian only";
@ -1050,6 +1052,7 @@ r_type(unsigned int mach, unsigned int type)
switch(mach) {
case EM_NONE: return "";
case EM_386:
case EM_IAMCU:
switch(type) {
case 0: return "R_386_NONE";
case 1: return "R_386_32";
@ -2381,6 +2384,7 @@ dwarf_reg(unsigned int mach, unsigned int reg)
switch (mach) {
case EM_386:
case EM_IAMCU:
switch (reg) {
case 0: return "eax";
case 1: return "ecx";
@ -4047,6 +4051,61 @@ dump_liblist(struct readelf *re)
#undef Elf_Lib
static void
dump_section_groups(struct readelf *re)
{
struct section *s;
const char *symname;
Elf_Data *d;
uint32_t *w;
int i, j, elferr;
size_t n;
for (i = 0; (size_t) i < re->shnum; i++) {
s = &re->sl[i];
if (s->type != SHT_GROUP)
continue;
(void) elf_errno();
if ((d = elf_getdata(s->scn, NULL)) == NULL) {
elferr = elf_errno();
if (elferr != 0)
warnx("elf_getdata failed: %s",
elf_errmsg(elferr));
continue;
}
if (d->d_size <= 0)
continue;
w = d->d_buf;
/* We only support COMDAT section. */
#ifndef GRP_COMDAT
#define GRP_COMDAT 0x1
#endif
if ((*w++ & GRP_COMDAT) == 0)
return;
if (s->entsize == 0)
s->entsize = 4;
symname = get_symbol_name(re, s->link, s->info);
n = s->sz / s->entsize;
if (n-- < 1)
return;
printf("\nCOMDAT group section [%5d] `%s' [%s] contains %ju"
" sections:\n", i, s->name, symname, (uintmax_t)n);
printf(" %-10.10s %s\n", "[Index]", "Name");
for (j = 0; (size_t) j < n; j++, w++) {
if (*w >= re->shnum) {
warnx("invalid section index: %u", *w);
continue;
}
printf(" [%5u] %s\n", *w, re->sl[*w].name);
}
}
}
static uint8_t *
dump_unknown_tag(uint64_t tag, uint8_t *p)
{
@ -6838,6 +6897,8 @@ dump_elf(struct readelf *re)
dump_phdr(re);
if (re->options & RE_SS)
dump_shdr(re);
if (re->options & RE_G)
dump_section_groups(re);
if (re->options & RE_D)
dump_dynamic(re);
if (re->options & RE_R)
@ -7311,7 +7372,7 @@ Usage: %s [options] file...\n\
-c | --archive-index Print the archive symbol table for archives.\n\
-d | --dynamic Print the contents of SHT_DYNAMIC sections.\n\
-e | --headers Print all headers in the object.\n\
-g | --section-groups (accepted, but ignored)\n\
-g | --section-groups Print the contents of the section groups.\n\
-h | --file-header Print the file header for the object.\n\
-l | --program-headers Print the PHDR table for the object.\n\
-n | --notes Print the contents of SHT_NOTE sections.\n\
@ -7365,8 +7426,8 @@ main(int argc, char **argv)
re->options |= RE_AA;
break;
case 'a':
re->options |= RE_AA | RE_D | RE_H | RE_II | RE_L |
RE_R | RE_SS | RE_S | RE_VV;
re->options |= RE_AA | RE_D | RE_G | RE_H | RE_II |
RE_L | RE_R | RE_SS | RE_S | RE_VV;
break;
case 'c':
re->options |= RE_C;

View File

@ -134,6 +134,8 @@ namespace ISD {
/// Index original Function's argument.
unsigned OrigArgIndex;
/// Sentinel value for implicit machine-level input arguments.
static const unsigned NoArgIndex = UINT_MAX;
/// Offset in bytes of current input value relative to the beginning of
/// original argument. E.g. if argument was splitted into four 32 bit
@ -147,6 +149,15 @@ namespace ISD {
VT = vt.getSimpleVT();
ArgVT = argvt;
}
bool isOrigArg() const {
return OrigArgIndex != NoArgIndex;
}
unsigned getOrigArgIndex() const {
assert(OrigArgIndex != NoArgIndex && "Implicit machine-level argument");
return OrigArgIndex;
}
};
/// OutputArg - This struct carries flags and a value for a

View File

@ -2806,6 +2806,11 @@ public:
virtual bool useLoadStackGuardNode() const {
return false;
}
/// Returns true if arguments should be sign-extended in lib calls.
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
return IsSigned;
}
};
/// Given an LLVM IR type and return type attributes, compute the return value

View File

@ -1776,9 +1776,12 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
<< *IsomorphicInc << '\n');
Value *NewInc = OrigInc;
if (OrigInc->getType() != IsomorphicInc->getType()) {
Instruction *IP = isa<PHINode>(OrigInc)
? (Instruction*)L->getHeader()->getFirstInsertionPt()
: OrigInc->getNextNode();
Instruction *IP = nullptr;
if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
IP = PN->getParent()->getFirstInsertionPt();
else
IP = OrigInc->getNextNode();
IRBuilder<> Builder(IP);
Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
NewInc = Builder.

View File

@ -75,10 +75,9 @@ MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
I != E; ++I) {
unsigned MappedDef = *I;
// Source of copy is no longer available for propagation.
if (AvailCopyMap.erase(MappedDef)) {
for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
AvailCopyMap.erase(*SR);
}
AvailCopyMap.erase(MappedDef);
for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR)
AvailCopyMap.erase(*SR);
}
}
}

View File

@ -1160,13 +1160,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
// Early exit if this basic block is in an optnone function.
AttributeSet FnAttrs =
DAG.getMachineFunction().getFunction()->getAttributes();
if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeNone))
return;
// Add all the dag nodes to the worklist.
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
E = DAG.allnodes_end(); I != E; ++I)
@ -2788,9 +2781,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
Constant = APInt::getAllOnesValue(BitWidth);
for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if (SplatBitSize % BitWidth == 0) {
Constant = APInt::getAllOnesValue(BitWidth);
for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
}
}
}
@ -11043,7 +11040,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
} else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
// If the input vector is too large, try to split it.
// We don't support having two input vectors that are too large.
if (VecIn2.getNode())
// If the zero vector was used, we can not split the vector,
// since we'd need 3 inputs.
if (UsesZeroVector || VecIn2.getNode())
return SDValue();
if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
@ -11055,7 +11054,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy()));
VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
DAG.getConstant(0, TLI.getVectorIdxTy()));
UsesZeroVector = false;
} else
return SDValue();
}

View File

@ -497,7 +497,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
OI != E; ++OI) {
const Value *Idx = *OI;
if (auto *StTy = dyn_cast<StructType>(Ty)) {
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
if (Field) {
// N = N + Offset
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
@ -518,8 +518,8 @@ bool FastISel::selectGetElementPtr(const User *I) {
if (CI->isZero())
continue;
// N = N + Offset
TotalOffs +=
DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue();
TotalOffs += DL.getTypeAllocSize(Ty) * IdxN;
if (TotalOffs >= MaxOffs) {
N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
if (!N) // Unhandled operand. Halt "fast" selection and bail.

View File

@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(0));
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
&Op, 1, false, dl).first;
&Op, 1, Signed, dl).first;
}

View File

@ -1423,9 +1423,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// If one or more successors has been unscheduled, then the current
// node is no longer available.
if (!TrySU->isAvailable)
if (!TrySU->isAvailable || !TrySU->NodeQueueId)
CurSU = AvailableQueue->pop();
else {
// Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
}

View File

@ -3399,30 +3399,21 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
Ty = StTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
unsigned PtrSize = PtrTy.getSizeInBits();
APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
// If this is a constant subscript, handle it quickly.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
uint64_t Offs =
DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
EVT PTy = TLI.getPointerTy(AS);
unsigned PtrBits = PTy.getSizeInBits();
if (PtrBits < 64)
OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
DAG.getConstant(Offs, MVT::i64));
else
OffsVal = DAG.getConstant(Offs, PTy);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
OffsVal);
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
SDValue OffsVal = DAG.getConstant(Offs, PtrTy);
N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal);
continue;
}
// N = N + Idx * ElementSize;
APInt ElementSize =
APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty));
SDValue IdxN = getValue(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend
@ -5727,6 +5718,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Skip the first return-type Attribute to get to params.
Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
Args.push_back(Entry);
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
if (Entry.isSRet && isa<Instruction>(V))
isTailCall = false;
}
// Check if target-independent constraints permit a tail call here.
@ -7353,6 +7349,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
// points into the callers stack frame.
CLI.IsTailCall = false;
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
@ -7638,7 +7638,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ISD::ArgFlagsTy Flags;
Flags.setSRet();
MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0);
ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
ISD::InputArg::NoArgIndex, 0);
Ins.push_back(RetArg);
}

View File

@ -96,18 +96,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
Entry.isSExt = isSigned;
Entry.isZExt = !isSigned;
Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
.setSExtResult(isSigned).setZExtResult(!isSigned);
.setSExtResult(signExtend).setZExtResult(!signExtend);
return LowerCallTo(CLI);
}

View File

@ -177,25 +177,30 @@ bool RuntimeDyldMachO::isCompatibleFile(const object::ObjectFile &Obj) const {
}
template <typename Impl>
void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &ObjImg,
void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
ObjSectionToIDMap &SectionMap) {
unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
ObjSectionToIDMap::iterator i, e;
for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) {
const SectionRef &Section = i->first;
for (const auto &Section : Obj.sections()) {
StringRef Name;
Section.getName(Name);
if (Name == "__eh_frame")
EHFrameSID = i->second;
else if (Name == "__text")
TextSID = i->second;
// Force emission of the __text, __eh_frame, and __gcc_except_tab sections
// if they're present. Otherwise call down to the impl to handle other
// sections that have already been emitted.
if (Name == "__text")
TextSID = findOrEmitSection(Obj, Section, true, SectionMap);
else if (Name == "__eh_frame")
EHFrameSID = findOrEmitSection(Obj, Section, false, SectionMap);
else if (Name == "__gcc_except_tab")
ExceptTabSID = i->second;
else
impl().finalizeSection(ObjImg, i->second, Section);
ExceptTabSID = findOrEmitSection(Obj, Section, true, SectionMap);
else {
auto I = SectionMap.find(Section);
if (I != SectionMap.end())
impl().finalizeSection(Obj, I->second, Section);
}
}
UnregisteredEHFrameSections.push_back(
EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@ -238,7 +243,8 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
}
static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
int64_t ObjDistance = A->ObjAddress - B->ObjAddress;
int64_t ObjDistance =
static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
int64_t MemDistance = A->LoadAddress - B->LoadAddress;
return ObjDistance - MemDistance;
}

View File

@ -1120,27 +1120,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantInt::get(CI1->getContext(), C1V | C2V);
case Instruction::Xor:
return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
case Instruction::Shl: {
uint32_t shiftAmt = C2V.getZExtValue();
if (shiftAmt < C1V.getBitWidth())
return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
else
return UndefValue::get(C1->getType()); // too big shift is undef
}
case Instruction::LShr: {
uint32_t shiftAmt = C2V.getZExtValue();
if (shiftAmt < C1V.getBitWidth())
return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
else
return UndefValue::get(C1->getType()); // too big shift is undef
}
case Instruction::AShr: {
uint32_t shiftAmt = C2V.getZExtValue();
if (shiftAmt < C1V.getBitWidth())
return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
else
return UndefValue::get(C1->getType()); // too big shift is undef
}
case Instruction::Shl:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
return UndefValue::get(C1->getType()); // too big shift is undef
case Instruction::LShr:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
return UndefValue::get(C1->getType()); // too big shift is undef
case Instruction::AShr:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
return UndefValue::get(C1->getType()); // too big shift is undef
}
}

View File

@ -263,10 +263,12 @@ bool GCOVFunction::readGCDA(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
// required to combine the edge counts that are contained in the GCDA file.
for (uint32_t BlockNo = 0; Count > 0; ++BlockNo) {
// The last block is always reserved for exit block
if (BlockNo >= Blocks.size()-1) {
if (BlockNo >= Blocks.size()) {
errs() << "Unexpected number of edges (in " << Name << ").\n";
return false;
}
if (BlockNo == Blocks.size() - 1)
errs() << "(" << Name << ") has arcs from exit block.\n";
GCOVBlock &Block = *Blocks[BlockNo];
for (size_t EdgeNo = 0, End = Block.getNumDstEdges(); EdgeNo < End;
++EdgeNo) {

View File

@ -333,23 +333,12 @@ void Memory::InvalidateInstructionCache(const void *Addr,
for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
asm volatile("icbi 0, %0" : : "r"(Line));
asm volatile("isync");
# elif (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \
defined(__GNUC__)
// FIXME: Can we safely always call this for __GNUC__ everywhere?
const char *Start = static_cast<const char *>(Addr);
const char *End = Start + Len;
__clear_cache(const_cast<char *>(Start), const_cast<char *>(End));
# elif defined(__mips__)
const char *Start = static_cast<const char *>(Addr);
# if defined(ANDROID)
// The declaration of "cacheflush" in Android bionic:
// extern int cacheflush(long start, long end, long flags);
const char *End = Start + Len;
long LStart = reinterpret_cast<long>(const_cast<char *>(Start));
long LEnd = reinterpret_cast<long>(const_cast<char *>(End));
cacheflush(LStart, LEnd, BCACHE);
# else
cacheflush(const_cast<char *>(Start), Len, BCACHE);
# endif
# endif
#endif // end apple

View File

@ -10,9 +10,15 @@
#ifdef HAVE___CHKSTK
EXPLICIT_SYMBOL(__chkstk)
#endif
#ifdef HAVE___CHKSTK_MS
EXPLICIT_SYMBOL(__chkstk_ms)
#endif
#ifdef HAVE____CHKSTK
EXPLICIT_SYMBOL(___chkstk)
#endif
#ifdef HAVE____CHKSTK_MS
EXPLICIT_SYMBOL(___chkstk_ms)
#endif
#ifdef HAVE___MAIN
EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
#endif

View File

@ -2031,18 +2031,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[i].OrigArgIndex;
// Get type of the original argument.
EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);

View File

@ -3092,8 +3092,11 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
if (Ins[VA.getValNo()].isOrigArg()) {
std::advance(CurOrigArg,
Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
}
// Arguments stored in registers.
if (VA.isRegLoc()) {
EVT RegVT = VA.getLocVT();
@ -3173,7 +3176,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
int index = ArgLocs[i].getValNo();
int index = VA.getValNo();
// Some Ins[] entries become multiple ArgLoc[] entries.
// Process them only once.
@ -3186,6 +3189,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
assert(Ins[index].isOrigArg() &&
"Byval arguments cannot be implicit");
unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
ByValStoreOffset = RoundUpToAlignment(ByValStoreOffset, Flags.getByValAlign());

View File

@ -3667,43 +3667,44 @@ bool MipsAsmParser::parseDirectiveModule() {
return false;
}
if (Lexer.is(AsmToken::Identifier)) {
StringRef Option = Parser.getTok().getString();
Parser.Lex();
if (Option == "oddspreg") {
getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token, expected end of statement");
return false;
}
return false;
} else if (Option == "nooddspreg") {
if (!isABI_O32()) {
Error(L, "'.module nooddspreg' requires the O32 ABI");
return false;
}
getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token, expected end of statement");
return false;
}
return false;
} else if (Option == "fp") {
return parseDirectiveModuleFP();
}
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
StringRef Option;
if (Parser.parseIdentifier(Option)) {
reportParseError("expected .module option identifier");
return false;
}
return false;
if (Option == "oddspreg") {
getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token, expected end of statement");
return false;
}
return false; // parseDirectiveModule has finished successfully.
} else if (Option == "nooddspreg") {
if (!isABI_O32()) {
Error(L, "'.module nooddspreg' requires the O32 ABI");
return false;
}
getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
// If this is not the end of the statement, report an error.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
reportParseError("unexpected token, expected end of statement");
return false;
}
return false; // parseDirectiveModule has finished successfully.
} else if (Option == "fp") {
return parseDirectiveModuleFP();
} else {
return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
}
}
/// parseDirectiveModuleFP

View File

@ -259,6 +259,11 @@ static DecodeStatus DecodeCacheOp(MCInst &Inst,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@ -304,6 +309,10 @@ static DecodeStatus DecodeFMem3(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeFMemCop2R6(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@ -1118,6 +1127,23 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
return MCDisassembler::Success;
}
static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
int Offset = fieldFromInstruction(Insn, 7, 9);
unsigned Hint = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 21, 5);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
Inst.addOperand(MCOperand::CreateReg(Base));
Inst.addOperand(MCOperand::CreateImm(Offset));
Inst.addOperand(MCOperand::CreateImm(Hint));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSyncI(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@ -1354,6 +1380,23 @@ static DecodeStatus DecodeFMem3(MCInst &Inst,
return MCDisassembler::Success;
}
static DecodeStatus DecodeFMemCop2R6(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
int Offset = SignExtend32<11>(Insn & 0x07ff);
unsigned Reg = fieldFromInstruction(Insn, 16, 5);
unsigned Base = fieldFromInstruction(Insn, 11, 5);
Reg = getReg(Decoder, Mips::COP2RegClassID, Reg);
Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
Inst.addOperand(MCOperand::CreateReg(Reg));
Inst.addOperand(MCOperand::CreateReg(Base));
Inst.addOperand(MCOperand::CreateImm(Offset));
return MCDisassembler::Success;
}
static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
unsigned Insn,
uint64_t Address,

View File

@ -58,15 +58,15 @@ def MipsInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
def FeatureNoABICalls : SubtargetFeature<"noabicalls", "NoABICalls", "true",
"Disable SVR4-style position-independent code.">;
"Disable SVR4-style position-independent code">;
def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
"General Purpose Registers are 64-bit wide">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
"Support 64-bit FP registers.">;
"Support 64-bit FP registers">;
def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
"Support for FPXX.">;
"Support for FPXX">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding.">;
"IEEE 754-2008 NaN encoding">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "ABI", "MipsABIInfo::O32()",
@ -81,7 +81,7 @@ def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
"Disable odd numbered single-precision "
"registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
"true", "Enable vector FPU instructions">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
"Mips I ISA Support [highly experimental]">;
def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",

View File

@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
if (Amount == 0)
return;
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
BuildAddiuSpImm(MBB, I, Amount);
else

View File

@ -379,7 +379,6 @@ class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
list<dag> Pattern = [];
bit isTerminator = 1;
bit hasDelaySlot = 0;
string DecoderMethod = "DecodeSimm16";
}
class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
@ -550,6 +549,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
list<dag> Pattern = [];
string DecoderMethod = "DecodeCacheOpR6";
}
class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@ -561,6 +561,7 @@ class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list<dag> Pattern = [];
bit mayLoad = 1;
string DecoderMethod = "DecodeFMemCop2R6";
}
class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
@ -572,6 +573,7 @@ class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
list<dag> Pattern = [];
bit mayStore = 1;
string DecoderMethod = "DecodeFMemCop2R6";
}
class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;

View File

@ -132,8 +132,8 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
continue;
}
assert(Ins[i].OrigArgIndex < MF.getFunction()->arg_size());
std::advance(FuncArg, Ins[i].OrigArgIndex);
assert(Ins[i].getOrigArgIndex() < MF.getFunction()->arg_size());
std::advance(FuncArg, Ins[i].getOrigArgIndex());
OriginalArgWasF128.push_back(
originalTypeIsF128(FuncArg->getType(), nullptr));

View File

@ -123,7 +123,7 @@ def CC_MipsN_SoftFloat : CallingConv<[
]>;
def CC_MipsN : CallingConv<[
CCIfType<[i8, i16, i32],
CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
@ -159,6 +159,10 @@ def CC_MipsN : CallingConv<[
// N32/64 variable arguments.
// All arguments are passed in integer registers.
def CC_MipsN_VarArg : CallingConv<[
CCIfType<[i8, i16, i32, i64],
CCIfSubtargetNot<"isLittle()",
CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
// All integers are promoted to 64-bit.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,

View File

@ -619,6 +619,33 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2);
ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(ValueIfFalse);
if (!FalseC || FalseC->getZExtValue())
return SDValue();
// Since RHS (False) is 0, we swap the order of the True/False operands
// (obviously also inverting the condition) so that we can
// take advantage of conditional moves using the $0 register.
// Example:
// return (a != 0) ? x : 0;
// load $reg, x
// movz $reg, $0, a
unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F :
MipsISD::CMovFP_T;
SDValue FCC = N->getOperand(1), Glue = N->getOperand(3);
return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(),
ValueIfFalse, FCC, ValueIfTrue, Glue);
}
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
@ -752,6 +779,9 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performDivRemCombine(N, DAG, DCI, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, DCI, Subtarget);
case MipsISD::CMovFP_F:
case MipsISD::CMovFP_T:
return performCMovFPCombine(N, DAG, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
@ -2039,7 +2069,7 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
DAG.getConstant(0x20, MVT::i32));
DAG.getConstant(VT.getSizeInBits(), MVT::i32));
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond,
DAG.getConstant(0, VT), ShiftLeftLo);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or);
@ -2078,11 +2108,12 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL,
DL, VT, Hi, Shamt);
SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt,
DAG.getConstant(0x20, MVT::i32));
SDValue Shift31 = DAG.getNode(ISD::SRA, DL, VT, Hi, DAG.getConstant(31, VT));
DAG.getConstant(VT.getSizeInBits(), MVT::i32));
SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi,
DAG.getConstant(VT.getSizeInBits() - 1, VT));
Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or);
Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond,
IsSRA ? Shift31 : DAG.getConstant(0, VT), ShiftRightHi);
IsSRA ? Ext : DAG.getConstant(0, VT), ShiftRightHi);
SDValue Ops[2] = {Lo, Hi};
return DAG.getMergeValues(Ops, DL);
@ -2902,13 +2933,16 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(FuncArg, Ins[i].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[i].OrigArgIndex;
if (Ins[i].isOrigArg()) {
std::advance(FuncArg, Ins[i].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
}
EVT ValVT = VA.getValVT();
ISD::ArgFlagsTy Flags = Ins[i].Flags;
bool IsRegLoc = VA.isRegLoc();
if (Flags.isByVal()) {
assert(Ins[i].isOrigArg() && "Byval arguments cannot be implicit");
unsigned FirstByValReg, LastByValReg;
unsigned ByValIdx = CCInfo.getInRegsParamsProcessed();
CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg);
@ -3029,6 +3063,15 @@ MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_Mips);
}
bool
MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
if (Subtarget.hasMips3() && Subtarget.abiUsesSoftFloat()) {
if (Type == MVT::i32)
return true;
}
return IsSigned;
}
SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,

View File

@ -472,6 +472,8 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
// Inline asm support
ConstraintType
getConstraintType(const std::string &Constraint) const override;

View File

@ -458,42 +458,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
MADDS_FM<4, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
MADDS_FM<5, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
MADDS_FM<6, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
MADDS_FM<7, 0>, INSN_MIPS4_32R2_NOT_32R6_64R6;
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
}
let isCodeGenOnly=1 in {
let DecoderNamespace = "Mips64" in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
MADDS_FM<5, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
let AdditionalPredicates = [NoNaNsFPMath],
isCodeGenOnly=1 in {
DecoderNamespace = "Mips64" in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
//===----------------------------------------------------------------------===//

View File

@ -388,6 +388,8 @@ def MSA128W: RegisterClass<"Mips", [v4i32, v4f32], 128,
(sequence "W%u", 0, 31)>;
def MSA128D: RegisterClass<"Mips", [v2i64, v2f64], 128,
(sequence "W%u", 0, 31)>;
def MSA128WEvens: RegisterClass<"Mips", [v4i32, v4f32], 128,
(decimate (sequence "W%u", 0, 31), 2)>;
def MSACtrl: RegisterClass<"Mips", [i32], 32, (add
MSAIR, MSACSR, MSAAccess, MSASave, MSAModify, MSARequest, MSAMap, MSAUnmap)>;

View File

@ -258,8 +258,12 @@ SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag,
CurDAG->getTargetConstant(Mips::sub_32, VT));
}
SDNode *AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT,
SDValue(Carry, 0), RHS);
// Generate a second addition only if we know that RHS is not a
// constant-zero node.
SDNode *AddCarry = Carry;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
if (!C || C->getZExtValue())
AddCarry = CurDAG->getMachineNode(ADDuOp, DL, VT, SDValue(Carry, 0), RHS);
return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS,
SDValue(AddCarry, 0));

View File

@ -2883,10 +2883,21 @@ emitCOPY_FW(MachineInstr *MI, MachineBasicBlock *BB) const{
unsigned Ws = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
if (Lane == 0)
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_lo);
else {
unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
if (Lane == 0) {
unsigned Wt = Ws;
if (!Subtarget.useOddSPReg()) {
// We must copy to an even-numbered MSA register so that the
// single-precision sub-register is also guaranteed to be even-numbered.
Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
}
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
} else {
unsigned Wt = RegInfo.createVirtualRegister(
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
&Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
@ -2948,7 +2959,9 @@ MipsSETargetLowering::emitINSERT_FW(MachineInstr *MI,
unsigned Wd_in = MI->getOperand(1).getReg();
unsigned Lane = MI->getOperand(2).getImm();
unsigned Fs = MI->getOperand(3).getReg();
unsigned Wt = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
unsigned Wt = RegInfo.createVirtualRegister(
Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass :
&Mips::MSA128WEvensRegClass);
BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
.addImm(0)

View File

@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
if (Amount == 0)
return;
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.

View File

@ -2688,9 +2688,10 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned ObjSize = ObjectVT.getStoreSize();
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
if (Ins[ArgNo].isOrigArg()) {
std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[ArgNo].getOrigArgIndex();
}
/* Respect alignment of argument on the stack. */
unsigned Align =
CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
@ -2704,6 +2705,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@ -3064,9 +3067,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
if (Ins[ArgNo].isOrigArg()) {
std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[ArgNo].getOrigArgIndex();
}
unsigned CurArgOffset = ArgOffset;
// Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
@ -3087,6 +3091,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
if (Flags.isByVal()) {
assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
// ObjSize is the true size, ArgSize rounded up to multiple of registers.
ObjSize = Flags.getByValSize();
ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

View File

@ -97,6 +97,11 @@ def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
"true",
"Enable spilling of VGPRs to scratch memory">;
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",

View File

@ -40,7 +40,8 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
std::vector<Function*> FuncsToClone;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty())
if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
!F.hasFnAttribute(Attribute::NoInline))
FuncsToClone.push_back(&F);
}
@ -54,7 +55,7 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Function &F = *I;
if (F.hasLocalLinkage()) {
if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
F.addFnAttr(Attribute::AlwaysInline);
}
}

View File

@ -343,6 +343,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
if (STM.hasSGPRInitBug()) {
if (ProgInfo.NumSGPR > AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG)
llvm_unreachable("Too many SGPRs used with the SGPR init bug");
ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
}
ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode

View File

@ -439,6 +439,31 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
case ISD::STORE: {
// Handle i64 stores here for the same reason mentioned above for loads.
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Value = ST->getValue();
if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
break;
SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
MVT::v2i32, Value);
SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
ST->getBasePtr(), ST->getMemOperand());
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
if (NewValue.getOpcode() == ISD::BITCAST) {
Select(NewStore.getNode());
return SelectCode(NewValue.getNode());
}
// getNode() may fold the bitcast if its input was another bitcast. If that
// happens we should only select the new store.
N = NewStore.getNode();
break;
}
case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
@ -761,6 +786,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
SDLoc SL(N);
EVT VT = N->getValueType(0);
@ -770,19 +797,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
SDValue Ops[] = {
Zero, // src0_modifiers
N->getOperand(0), // src0
Zero, // src1_modifiers
N->getOperand(1), // src1
Zero, // src2_modifiers
N->getOperand(2), // src2
False, // clamp
Zero // omod
};
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
SDValue Ops[8];
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
}

View File

@ -141,9 +141,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::i64, Promote);
AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
@ -162,9 +159,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
// XXX: Native v2i32 local address space stores are possible, but not
// currently implemented.
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
@ -832,11 +826,9 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
DAG.ExtractVectorElements(A, Args);
DAG.ExtractVectorElements(B, Args);
for (const SDUse &U : Op->ops())
DAG.ExtractVectorElements(U.get(), Args);
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
}
@ -881,9 +873,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return LowerIntrinsicIABS(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
case AMDGPUIntrinsic::AMDGPU_fract:
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_clamp:
case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
@ -913,10 +902,9 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::AMDGPU_div_fmas:
// FIXME: Dropping bool parameter. Work is needed to support the implicit
// read from VCC.
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
case Intrinsic::AMDGPU_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,

View File

@ -140,6 +140,12 @@ public:
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
/// \brief Return the descriptor of the target-specific machine instruction
/// that corresponds to the specified pseudo or native opcode.
const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
return get(pseudoToMCOpcode(Opcode));
}
//===---------------------------------------------------------------------===//
// Pure virtual funtions to be implemented by sub-classes.
//===---------------------------------------------------------------------===//

View File

@ -35,6 +35,11 @@ def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
// float, float, float, vcc
def AMDGPUFmasOp : SDTypeProfile<1, 4,
[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
>;
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@ -153,7 +158,7 @@ def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
// Special case divide FMA with scale and flags (src0 = Quotient,
// src1 = Denominator, src2 = Numerator).
def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Single or double precision division fixup.
// Special case divide fixup and flags(src0 = Quotient, src1 =

View File

@ -164,10 +164,6 @@ class PrivateStore <SDPatternOperator op> : PrivateMemOp <
(ops node:$value, node:$ptr), (op node:$value, node:$ptr)
>;
def extloadi8_private : PrivateLoad <extloadi8>;
def sextloadi8_private : PrivateLoad <sextloadi8>;
def extloadi16_private : PrivateLoad <extloadi16>;
def sextloadi16_private : PrivateLoad <sextloadi16>;
def load_private : PrivateLoad <load>;
def truncstorei8_private : PrivateStore <truncstorei8>;
@ -231,6 +227,9 @@ def sextloadi8_local : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def extloadi8_private : PrivateLoad <az_extloadi8>;
def sextloadi8_private : PrivateLoad <sextloadi8>;
def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
}]>;
@ -267,6 +266,9 @@ def sextloadi16_local : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
def extloadi16_private : PrivateLoad <az_extloadi16>;
def sextloadi16_private : PrivateLoad <sextloadi16>;
def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{
return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
}]>;
@ -649,17 +651,10 @@ class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
(RcpInst $src)
>;
multiclass RsqPat<Instruction RsqInst, ValueType vt> {
def : Pat <
(fdiv FP_ONE, (fsqrt vt:$src)),
(RsqInst $src)
>;
def : Pat <
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
}
class RsqPat<Instruction RsqInst, ValueType vt> : Pat <
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
include "R600Instructions.td"
include "R700Instructions.td"

View File

@ -68,6 +68,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
}

View File

@ -80,7 +80,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
FlatAddressSpace(false), EnableIRStructurizer(true),
EnablePromoteAlloca(false), EnableIfCvt(true),
EnableLoadStoreOpt(false), WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false),
EnableVGPRSpilling(false),SGPRInitBug(false),
DL(computeDataLayout(initializeSubtargetDependencies(GPU, FS))),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)

View File

@ -45,6 +45,10 @@ public:
VOLCANIC_ISLANDS,
};
enum {
FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
};
private:
std::string DevName;
bool Is64bit;
@ -66,6 +70,7 @@ private:
bool CFALUBug;
int LocalMemorySize;
bool EnableVGPRSpilling;
bool SGPRInitBug;
const DataLayout DL;
AMDGPUFrameLowering FrameLowering;
@ -203,6 +208,10 @@ public:
return LocalMemorySize;
}
bool hasSGPRInitBug() const {
return SGPRInitBug;
}
unsigned getAmdKernelCodeChipID() const;
bool enableMachineScheduler() const override {

View File

@ -46,7 +46,7 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
defm : RsqPat<RECIPSQRT_IEEE_cm, f32>;
def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;

View File

@ -69,7 +69,7 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
defm : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def SIN_eg : SIN_Common<0x8D>;
def COS_eg : COS_Common<0x8E>;

View File

@ -291,6 +291,8 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printImmediate64(Op.getImm(), O);
else
llvm_unreachable("Invalid register class size");
} else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
printImmediate32(Op.getImm(), O);
} else {
// We hit this for the immediate instruction bits that don't yet have a
// custom printer.

View File

@ -113,8 +113,12 @@ def : ProcessorModel<"mullins", SIQuarterSpeedModel, [FeatureSeaIslands]>;
// Volcanic Islands
//===----------------------------------------------------------------------===//
def : ProcessorModel<"tonga", SIFullSpeedModel, [FeatureVolcanicIslands]>;
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureSGPRInitBug]
>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureSGPRInitBug]
>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel, [FeatureVolcanicIslands]>;

View File

@ -838,6 +838,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::AMDGPU_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_fract:
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@ -1694,7 +1698,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
// XXX - I think PartOffset should give you this, but it seems to give the
// size of the register which isn't useful.
unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset();
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
unsigned Offset = 36 + VA.getLocMemOffset();

View File

@ -1193,7 +1193,7 @@ let Predicates = [isR600] in {
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
defm : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : FROUNDPat <CNDGE_r600, CNDGT_r600>;

View File

@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
Value *handleLoopCondition(Value *Cond, PHINode *Broken);
Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
void handleLoop(BranchInst *Term);
@ -207,8 +207,17 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
llvm::Loop *L) {
// Only search through PHI nodes which are inside the loop. If we try this
// with PHI nodes that are outside of the loop, we end up inserting new PHI
// nodes outside of the loop which depend on values defined inside the loop.
// This will break the module with
// 'Instruction does not dominate all users!' errors.
PHINode *Phi = nullptr;
if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
BasicBlock *Parent = Phi->getParent();
PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
Value *Ret = NewPhi;
@ -223,7 +232,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
}
Phi->setIncomingValue(i, BoolFalse);
Value *PhiArg = handleLoopCondition(Incoming, Broken);
Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
NewPhi->addIncoming(PhiArg, From);
}
@ -253,7 +262,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
TerminatorInst *Insert = Parent->getTerminator();
Instruction *Insert;
if (L->contains(Inst)) {
Insert = Parent->getTerminator();
} else {
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
}
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
@ -265,14 +279,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
BasicBlock *BB = Term->getParent();
llvm::Loop *L = LI->getLoopFor(BB);
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
Value *Arg = handleLoopCondition(Cond, Broken);
Value *Arg = handleLoopCondition(Cond, Broken, L);
BasicBlock *BB = Term->getParent();
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {

View File

@ -35,7 +35,8 @@ enum {
SMRD = 1 << 16,
DS = 1 << 17,
MIMG = 1 << 18,
FLAT = 1 << 19
FLAT = 1 << 19,
WQM = 1 << 20
};
}

View File

@ -209,7 +209,12 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
APInt Imm;
if (FoldingImm) {
const TargetRegisterClass *UseRC = MRI.getRegClass(UseOp.getReg());
unsigned UseReg = UseOp.getReg();
const TargetRegisterClass *UseRC
= TargetRegisterInfo::isVirtualRegister(UseReg) ?
MRI.getRegClass(UseReg) :
TRI.getRegClass(UseReg);
Imm = APInt(64, OpToFold.getImm());
// Split 64-bit constants into 32-bits for folding.
@ -228,8 +233,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// In order to fold immediates into copies, we need to change the
// copy to a MOV.
if (UseMI->getOpcode() == AMDGPU::COPY) {
unsigned MovOp = TII->getMovOpcode(
MRI.getRegClass(UseMI->getOperand(0).getReg()));
unsigned DestReg = UseMI->getOperand(0).getReg();
const TargetRegisterClass *DestRC
= TargetRegisterInfo::isVirtualRegister(DestReg) ?
MRI.getRegClass(DestReg) :
TRI.getRegClass(DestReg);
unsigned MovOp = TII->getMovOpcode(DestRC);
if (MovOp == AMDGPU::COPY)
continue;

View File

@ -89,8 +89,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v16i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
@ -158,8 +156,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
@ -214,6 +210,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
}
setOperationAction(ISD::FDIV, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
@ -314,9 +311,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
if (!VT.isSimple() || VT == MVT::Other)
return false;
// XXX - CI changes say "Support for unaligned memory accesses" but I don't
// see what for specifically. The wording everywhere else seems to be the
// same.
// TODO - CI+ supports unaligned memory accesses, but this requires driver
// support.
// XXX - The only mention I see of this in the ISA manual is for LDS direct
// reads the "byte address and must be dword aligned". Is it also true for the
@ -328,12 +324,18 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return Align % 4 == 0;
}
// Smaller than dword value must be aligned.
// FIXME: This should be allowed on CI+
if (VT.bitsLT(MVT::i32))
return false;
// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
// byte-address are ignored, thus forcing Dword alignment.
// This applies to private, global, and constant memory.
if (IsFast)
*IsFast = true;
return VT.bitsGT(MVT::i32);
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
}
EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
@ -448,7 +450,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// We REALLY want the ORIGINAL number of vertex elements here, e.g. a
// three or five element vertex only needs three or five registers,
// NOT four or eigth.
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
for (unsigned j = 0; j != NumElements; ++j) {
@ -531,7 +533,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Offset, Ins[i].Flags.isSExt());
const PointerType *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex));
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
// On SI local pointers are just offsets into LDS, so they are always
@ -566,7 +568,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (Arg.VT.isVector()) {
// Build a vector from the registers
Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
SmallVector<SDValue, 4> Regs;
@ -919,6 +921,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
case AMDGPUIntrinsic::AMDGPU_fract:
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
default:
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
}
@ -1104,7 +1112,70 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
}
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
if (DAG.getTarget().Options.UnsafeFPMath)
return LowerFastFDIV(Op, DAG);
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);
SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);
SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);
SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);
SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);
SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);
SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);
SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);
SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
NegDivScale0, Mul, DivScale1);
SDValue Scale;
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
// Workaround a hardware bug on SI where the condition output from div_scale
// is not usable.
const SDValue Hi = DAG.getConstant(1, MVT::i32);
// Figure out if the scale to use for div_fmas.
SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);
SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);
SDValue Scale0Hi
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
SDValue Scale1Hi
= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);
SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
} else {
Scale = DivScale1.getValue(1);
}
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
Fma4, Fma3, Mul, Scale);
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
}
SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
@ -1125,11 +1196,6 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Store->getMemoryVT();
// These stores are legal.
if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
VT.isVector() && VT.getVectorNumElements() == 2 &&
VT.getVectorElementType() == MVT::i32)
return SDValue();
if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
if (VT.isVector() && VT.getVectorNumElements() > 4)
return ScalarizeVectorStore(Op, DAG);
@ -1524,6 +1590,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::UMAX:
case AMDGPUISD::UMIN: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMin3Max3Combine(N, DCI);
break;

View File

@ -82,6 +82,8 @@ private:
/// \brief Type of the last opcode.
InstType LastOpcodeType;
bool LastInstWritesM0;
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@ -106,6 +108,9 @@ private:
/// \brief Resolve all operand dependencies to counter requirements
Counters handleOperands(MachineInstr &MI);
/// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG.
void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
public:
SIInsertWaits(TargetMachine &tm) :
MachineFunctionPass(ID),
@ -269,6 +274,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
// Insert a NOP to break the clause.
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
.addImm(0);
LastInstWritesM0 = false;
}
if (TII->isSMRD(I->getOpcode()))
@ -362,6 +368,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
((Counts.Named.LGKM & 0x7) << 8));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
return true;
}
@ -403,6 +410,30 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
return Result;
}
void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
if (TRI->ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
LastInstWritesM0 = false;
return;
}
// Set whether this instruction sets M0
LastInstWritesM0 = false;
unsigned NumOperands = I->getNumOperands();
for (unsigned i = 0; i < NumOperands; i++) {
const MachineOperand &Op = I->getOperand(i);
if (Op.isReg() && Op.isDef() && Op.getReg() == AMDGPU::M0)
LastInstWritesM0 = true;
}
}
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@ -417,6 +448,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
WaitedOn = ZeroCounts;
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@ -433,7 +465,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
Changes |= insertWait(MBB, I, LastIssued);
else
Changes |= insertWait(MBB, I, handleOperands(*I));
pushInstruction(MBB, I);
handleSendMsg(MBB, I);
}
// Wait for everything at the end of the MBB

View File

@ -38,6 +38,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> DS = 0;
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
field bits<1> WQM = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
@ -64,6 +65,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{17} = DS;
let TSFlags{18} = MIMG;
let TSFlags{19} = FLAT;
let TSFlags{20} = WQM;
// Most instructions require adjustments after selection to satisfy
// operand requirements.
@ -295,18 +297,32 @@ class VOP1e <bits<8> op> : Enc32 {
}
class VOP2e <bits<6> op> : Enc32 {
bits<8> vdst;
bits<9> src0;
bits<8> src1;
bits<8> VDST;
bits<9> SRC0;
bits<8> VSRC1;
let Inst{8-0} = SRC0;
let Inst{16-9} = VSRC1;
let Inst{24-17} = VDST;
let Inst{8-0} = src0;
let Inst{16-9} = src1;
let Inst{24-17} = vdst;
let Inst{30-25} = op;
let Inst{31} = 0x0; //encoding
}
class VOP2_MADKe <bits<6> op> : Enc64 {
bits<8> vdst;
bits<9> src0;
bits<8> vsrc1;
bits<32> src2;
let Inst{8-0} = src0;
let Inst{16-9} = vsrc1;
let Inst{24-17} = vdst;
let Inst{30-25} = op;
let Inst{31} = 0x0; // encoding
let Inst{63-32} = src2;
}
class VOP3e <bits<9> op> : Enc64 {
bits<8> dst;
@ -554,9 +570,6 @@ class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP3Common <outs, ins, asm, pattern>, VOP3be<op>;
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@ -585,9 +598,6 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let SchedRW = [WriteLDS];
}
class DS_si <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
DS <outs, ins, asm, pattern>, DSe<op>;
class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
InstSI<outs, ins, asm, pattern> {

View File

@ -121,12 +121,20 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
if (Load0->getOperand(0) != Load1->getOperand(0))
return false;
const ConstantSDNode *Load0Offset =
dyn_cast<ConstantSDNode>(Load0->getOperand(1));
const ConstantSDNode *Load1Offset =
dyn_cast<ConstantSDNode>(Load1->getOperand(1));
if (!Load0Offset || !Load1Offset)
return false;
// Check chain.
if (findChainOperand(Load0) != findChainOperand(Load1))
return false;
Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
Offset0 = Load0Offset->getZExtValue();
Offset1 = Load1Offset->getZExtValue();
return true;
}
@ -333,6 +341,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
} else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
if (DestReg == AMDGPU::VCC) {
if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
// FIXME: Hack until VReg_1 removed.
assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
.addImm(0)
.addReg(SrcReg, getKillRegState(KillSrc));
}
return;
}
assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@ -408,11 +431,15 @@ unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const {
int NewOpc;
// Try to map original to commuted opcode
if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1)
NewOpc = AMDGPU::getCommuteRev(Opcode);
// Check if the commuted (REV) opcode exists on the target.
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
// Try to map commuted to original opcode
if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1)
NewOpc = AMDGPU::getCommuteOrig(Opcode);
// Check if the original (non-REV) opcode exists on the target.
if (NewOpc != -1 && pseudoToMCOpcode(NewOpc) != -1)
return NewOpc;
return Opcode;
@ -1121,6 +1148,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
int RegClass = Desc.OpInfo[i].RegClass;
switch (Desc.OpInfo[i].OperandType) {
case MCOI::OPERAND_REGISTER:
if (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm()) {
@ -1131,7 +1160,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
case AMDGPU::OPERAND_REG_IMM32:
break;
case AMDGPU::OPERAND_REG_INLINE_C:
if (MI->getOperand(i).isImm() && !isInlineConstant(MI->getOperand(i))) {
if (isLiteralConstant(MI->getOperand(i))) {
ErrInfo = "Illegal immediate value for operand.";
return false;
}
@ -1152,7 +1181,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
if (!MI->getOperand(i).isReg())
continue;
int RegClass = Desc.OpInfo[i].RegClass;
if (RegClass != -1) {
unsigned Reg = MI->getOperand(i).getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
@ -1197,31 +1225,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
}
}
// Verify SRC1 for VOP2 and VOPC
if (Src1Idx != -1 && (isVOP2(Opcode) || isVOPC(Opcode))) {
const MachineOperand &Src1 = MI->getOperand(Src1Idx);
if (Src1.isImm()) {
ErrInfo = "VOP[2C] src1 cannot be an immediate.";
return false;
}
}
// Verify VOP3
if (isVOP3(Opcode)) {
if (Src0Idx != -1 && isLiteralConstant(MI->getOperand(Src0Idx))) {
ErrInfo = "VOP3 src0 cannot be a literal constant.";
return false;
}
if (Src1Idx != -1 && isLiteralConstant(MI->getOperand(Src1Idx))) {
ErrInfo = "VOP3 src1 cannot be a literal constant.";
return false;
}
if (Src2Idx != -1 && isLiteralConstant(MI->getOperand(Src2Idx))) {
ErrInfo = "VOP3 src2 cannot be a literal constant.";
return false;
}
}
// Verify misc. restrictions on specific instructions.
if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
@ -1292,6 +1295,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64;
}
}
@ -2043,6 +2047,24 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHL_B64:
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHLREV_B64;
swapOperands(Inst);
}
break;
case AMDGPU::S_ASHR_I64:
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_ASHRREV_I64;
swapOperands(Inst);
}
break;
case AMDGPU::S_LSHR_B64:
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
NewOpcode = AMDGPU::V_LSHRREV_B64;
swapOperands(Inst);
}
break;
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:

View File

@ -204,6 +204,10 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FLAT;
}
bool isWQM(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
@ -243,7 +247,27 @@ public:
/// the register class of its machine operand.
/// to infer the correct register class base on the other operands.
const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
unsigned OpNo) const;\
unsigned OpNo) const;
/// \brief Return the size in bytes of the operand OpNo on the given
// instruction opcode.
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
if (OpInfo.RegClass == -1) {
// If this is an immediate operand, this must be a 32-bit literal.
assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
return 4;
}
return RI.getRegClass(OpInfo.RegClass)->getSize();
}
/// \brief This form should usually be preferred since it handles operands
/// with unknown register classes.
unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
return getOpRegClass(MI, OpNo)->getSize();
}
/// \returns true if it is legal for the operand at index \p OpNo
/// to read a VGPR.

File diff suppressed because it is too large Load Diff

View File

@ -152,9 +152,11 @@ defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
[(set i32:$dst, (ctlz_zero_undef i32:$src0))]
>;
//defm S_FLBIT_I32_B64 : SOP1_32 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32", []>;
//defm S_FLBIT_I32_I64 : SOP1_32 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
[(set i32:$dst, (int_AMDGPU_flbit_i32 i32:$src0))]
>;
defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
[(set i32:$dst, (sext_inreg i32:$src0, i8))]
>;
@ -764,88 +766,88 @@ defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
//===----------------------------------------------------------------------===//
def DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
def DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
def DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
def DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
def DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
def DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
def DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
def DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
def DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
def DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
def DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
def DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
def DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
defm DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
defm DS_MIN_F32 : DS_1A1D_NORET <0x12, "ds_min_f32", VGPR_32>;
defm DS_MAX_F32 : DS_1A1D_NORET <0x13, "ds_max_f32", VGPR_32>;
def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
defm DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2_b32">;
//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "ds_wrxchg2_rtn_b32", VGPR_32, "ds_wrxchg2st64_b32">;
def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
defm DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
defm DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
let SubtargetPredicate = isCI in {
def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
} // End isCI
def DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
def DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
def DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
def DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
def DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
def DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
def DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
def DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
def DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
def DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
def DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
def DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
defm DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
defm DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2_b64">;
//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "ds_wrxchg2_rtn_b64", VReg_64, "ds_wrxchg2st64_b64">;
def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_f64", VReg_64, "ds_min_f64">;
def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_f64", VReg_64, "ds_max_f64">;
defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
//let SubtargetPredicate = isCI in {
// DS_CONDXCHG32_RTN_B64
@ -874,123 +876,120 @@ defm DS_WRITE2ST64_B64 : DS_Store2_Helper <0x0000004F, "ds_write2st64_b64", VReg
defm DS_READ2_B32 : DS_Load2_Helper <0x00000037, "ds_read2_b32", VReg_64>;
defm DS_READ2ST64_B32 : DS_Load2_Helper <0x00000038, "ds_read2st64_b32", VReg_64>;
defm DS_READ2_B64 : DS_Load2_Helper <0x00000075, "ds_read2_b64", VReg_128>;
defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000076, "ds_read2st64_b64", VReg_128>;
defm DS_READ2_B64 : DS_Load2_Helper <0x00000077, "ds_read2_b64", VReg_128>;
defm DS_READ2ST64_B64 : DS_Load2_Helper <0x00000078, "ds_read2st64_b64", VReg_128>;
//===----------------------------------------------------------------------===//
// MUBUF Instructions
//===----------------------------------------------------------------------===//
let SubtargetPredicate = isSICI in {
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "buffer_load_format_x", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "buffer_load_format_xy", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <0x00000002, "buffer_load_format_xyz", []>;
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "buffer_load_format_xyzw", VReg_128>;
//def BUFFER_STORE_FORMAT_X : MUBUF_ <0x00000004, "buffer_store_format_x", []>;
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "buffer_store_format_xy", []>;
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "buffer_store_format_xyz", []>;
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "buffer_store_format_xyzw", []>;
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <mubuf<0x00>, "buffer_load_format_x", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <mubuf<0x01>, "buffer_load_format_xy", []>;
//def BUFFER_LOAD_FORMAT_XYZ : MUBUF_ <mubuf<0x02>, "buffer_load_format_xyz", []>;
defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <mubuf<0x03>, "buffer_load_format_xyzw", VReg_128>;
//def BUFFER_STORE_FORMAT_X : MUBUF_ <mubuf<0x04>, "buffer_store_format_x", []>;
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <mubuf<0x05>, "buffer_store_format_xy", []>;
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <mubuf<0x06>, "buffer_store_format_xyz", []>;
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <mubuf<0x07>, "buffer_store_format_xyzw", []>;
defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
0x00000008, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, az_extloadi8_global
>;
defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
0x00000009, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, sextloadi8_global
>;
defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
0x0000000a, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, az_extloadi16_global
>;
defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
0x0000000b, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
>;
defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
0x0000000c, "buffer_load_dword", VGPR_32, i32, global_load
mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
>;
defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
0x0000000d, "buffer_load_dwordx2", VReg_64, v2i32, global_load
mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
>;
defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
0x0000000e, "buffer_load_dwordx4", VReg_128, v4i32, global_load
mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
>;
defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
0x00000018, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
>;
defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
0x0000001a, "buffer_store_short", VGPR_32, i32, truncstorei16_global
mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
>;
defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
0x0000001c, "buffer_store_dword", VGPR_32, i32, global_store
mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
>;
defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
0x0000001d, "buffer_store_dwordx2", VReg_64, v2i32, global_store
mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
>;
defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
0x0000001e, "buffer_store_dwordx4", VReg_128, v4i32, global_store
mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "buffer_atomic_swap", []>;
defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
0x00000030, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "buffer_atomic_cmpswap", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", []>;
defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
0x00000032, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
>;
defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
0x00000033, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "buffer_atomic_rsub", []>;
//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
0x00000035, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
>;
defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
0x00000036, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
>;
defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
0x00000037, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
>;
defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
0x00000038, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
>;
defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
0x00000039, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
>;
defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
0x0000003a, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
>;
defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
0x0000003b, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
>;
//def BUFFER_ATOMIC_INC : MUBUF_ <0x0000003c, "buffer_atomic_inc", []>;
//def BUFFER_ATOMIC_DEC : MUBUF_ <0x0000003d, "buffer_atomic_dec", []>;
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <0x0000003e, "buffer_atomic_fcmpswap", []>;
//def BUFFER_ATOMIC_FMIN : MUBUF_ <0x0000003f, "buffer_atomic_fmin", []>;
//def BUFFER_ATOMIC_FMAX : MUBUF_ <0x00000040, "buffer_atomic_fmax", []>;
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <0x00000050, "buffer_atomic_swap_x2", []>;
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <0x00000051, "buffer_atomic_cmpswap_x2", []>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <0x00000052, "buffer_atomic_add_x2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <0x00000053, "buffer_atomic_sub_x2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <0x00000054, "buffer_atomic_rsub_x2", []>;
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <0x00000055, "buffer_atomic_smin_x2", []>;
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <0x00000056, "buffer_atomic_umin_x2", []>;
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <0x00000057, "buffer_atomic_smax_x2", []>;
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <0x00000058, "buffer_atomic_umax_x2", []>;
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <0x00000059, "buffer_atomic_and_x2", []>;
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <0x0000005a, "buffer_atomic_or_x2", []>;
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <0x0000005b, "buffer_atomic_xor_x2", []>;
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <0x0000005c, "buffer_atomic_inc_x2", []>;
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <0x0000005d, "buffer_atomic_dec_x2", []>;
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <0x0000005e, "buffer_atomic_fcmpswap_x2", []>;
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <0x0000005f, "buffer_atomic_fmin_x2", []>;
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "buffer_atomic_fmax_x2", []>;
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "buffer_wbinvl1_sc", []>;
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "buffer_wbinvl1", []>;
} // End SubtargetPredicate = isSICI
//def BUFFER_ATOMIC_INC : MUBUF_ <mubuf<0x3c, 0x4b>, "buffer_atomic_inc", []>;
//def BUFFER_ATOMIC_DEC : MUBUF_ <mubuf<0x3d, 0x4c>, "buffer_atomic_dec", []>;
//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_ <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN : MUBUF_ <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX : MUBUF_ <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
//def BUFFER_ATOMIC_SWAP_X2 : MUBUF_X2 <mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", []>;
//def BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_X2 <mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", []>;
//def BUFFER_ATOMIC_ADD_X2 : MUBUF_X2 <mubuf<0x52, 0x62>, "buffer_atomic_add_x2", []>;
//def BUFFER_ATOMIC_SUB_X2 : MUBUF_X2 <mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", []>;
//def BUFFER_ATOMIC_RSUB_X2 : MUBUF_X2 <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
//def BUFFER_ATOMIC_SMIN_X2 : MUBUF_X2 <mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", []>;
//def BUFFER_ATOMIC_UMIN_X2 : MUBUF_X2 <mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", []>;
//def BUFFER_ATOMIC_SMAX_X2 : MUBUF_X2 <mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", []>;
//def BUFFER_ATOMIC_UMAX_X2 : MUBUF_X2 <mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", []>;
//def BUFFER_ATOMIC_AND_X2 : MUBUF_X2 <mubuf<0x59, 0x68>, "buffer_atomic_and_x2", []>;
//def BUFFER_ATOMIC_OR_X2 : MUBUF_X2 <mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", []>;
//def BUFFER_ATOMIC_XOR_X2 : MUBUF_X2 <mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", []>;
//def BUFFER_ATOMIC_INC_X2 : MUBUF_X2 <mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", []>;
//def BUFFER_ATOMIC_DEC_X2 : MUBUF_X2 <mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", []>;
//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
//===----------------------------------------------------------------------===//
// MTBUF Instructions
@ -1037,63 +1036,63 @@ defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>;
//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>;
//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>;
defm IMAGE_SAMPLE : MIMG_Sampler <0x00000020, "image_sample">;
defm IMAGE_SAMPLE_CL : MIMG_Sampler <0x00000021, "image_sample_cl">;
defm IMAGE_SAMPLE : MIMG_Sampler_WQM <0x00000020, "image_sample">;
defm IMAGE_SAMPLE_CL : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
defm IMAGE_SAMPLE_D : MIMG_Sampler <0x00000022, "image_sample_d">;
defm IMAGE_SAMPLE_D_CL : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
defm IMAGE_SAMPLE_L : MIMG_Sampler <0x00000024, "image_sample_l">;
defm IMAGE_SAMPLE_B : MIMG_Sampler <0x00000025, "image_sample_b">;
defm IMAGE_SAMPLE_B_CL : MIMG_Sampler <0x00000026, "image_sample_b_cl">;
defm IMAGE_SAMPLE_B : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
defm IMAGE_SAMPLE_B_CL : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
defm IMAGE_SAMPLE_LZ : MIMG_Sampler <0x00000027, "image_sample_lz">;
defm IMAGE_SAMPLE_C : MIMG_Sampler <0x00000028, "image_sample_c">;
defm IMAGE_SAMPLE_C_CL : MIMG_Sampler <0x00000029, "image_sample_c_cl">;
defm IMAGE_SAMPLE_C : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
defm IMAGE_SAMPLE_C_CL : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
defm IMAGE_SAMPLE_C_D : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
defm IMAGE_SAMPLE_C_D_CL : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
defm IMAGE_SAMPLE_C_L : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "image_sample_c_b">;
defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler <0x0000002e, "image_sample_c_b_cl">;
defm IMAGE_SAMPLE_C_B : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
defm IMAGE_SAMPLE_C_B_CL : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
defm IMAGE_SAMPLE_C_LZ : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
defm IMAGE_SAMPLE_O : MIMG_Sampler <0x00000030, "image_sample_o">;
defm IMAGE_SAMPLE_CL_O : MIMG_Sampler <0x00000031, "image_sample_cl_o">;
defm IMAGE_SAMPLE_O : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
defm IMAGE_SAMPLE_CL_O : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
defm IMAGE_SAMPLE_D_O : MIMG_Sampler <0x00000032, "image_sample_d_o">;
defm IMAGE_SAMPLE_D_CL_O : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
defm IMAGE_SAMPLE_L_O : MIMG_Sampler <0x00000034, "image_sample_l_o">;
defm IMAGE_SAMPLE_B_O : MIMG_Sampler <0x00000035, "image_sample_b_o">;
defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler <0x00000036, "image_sample_b_cl_o">;
defm IMAGE_SAMPLE_B_O : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
defm IMAGE_SAMPLE_B_CL_O : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
defm IMAGE_SAMPLE_LZ_O : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
defm IMAGE_SAMPLE_C_O : MIMG_Sampler <0x00000038, "image_sample_c_o">;
defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler <0x00000039, "image_sample_c_cl_o">;
defm IMAGE_SAMPLE_C_O : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
defm IMAGE_SAMPLE_C_CL_O : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
defm IMAGE_SAMPLE_C_D_O : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
defm IMAGE_SAMPLE_C_D_CL_O : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
defm IMAGE_SAMPLE_C_L_O : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler <0x0000003d, "image_sample_c_b_o">;
defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler <0x0000003e, "image_sample_c_b_cl_o">;
defm IMAGE_SAMPLE_C_B_O : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
defm IMAGE_SAMPLE_C_B_CL_O : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
defm IMAGE_SAMPLE_C_LZ_O : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "image_gather4">;
defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "image_gather4_cl">;
defm IMAGE_GATHER4 : MIMG_Gather_WQM <0x00000040, "image_gather4">;
defm IMAGE_GATHER4_CL : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "image_gather4_l">;
defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "image_gather4_b">;
defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "image_gather4_b_cl">;
defm IMAGE_GATHER4_B : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
defm IMAGE_GATHER4_B_CL : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "image_gather4_lz">;
defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "image_gather4_c">;
defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "image_gather4_c_cl">;
defm IMAGE_GATHER4_C : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
defm IMAGE_GATHER4_C_CL : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "image_gather4_c_b">;
defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "image_gather4_c_b_cl">;
defm IMAGE_GATHER4_C_B : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
defm IMAGE_GATHER4_C_B_CL : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "image_gather4_o">;
defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "image_gather4_cl_o">;
defm IMAGE_GATHER4_O : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
defm IMAGE_GATHER4_CL_O : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "image_gather4_l_o">;
defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "image_gather4_b_o">;
defm IMAGE_GATHER4_B_O : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "image_gather4_c_o">;
defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "image_gather4_c_cl_o">;
defm IMAGE_GATHER4_C_O : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
defm IMAGE_GATHER4_C_CL_O : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "image_gather4_c_b_o">;
defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "image_gather4_c_b_cl_o">;
defm IMAGE_GATHER4_C_B_O : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "image_get_lod">;
defm IMAGE_GET_LOD : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
defm IMAGE_SAMPLE_CD : MIMG_Sampler <0x00000068, "image_sample_cd">;
defm IMAGE_SAMPLE_CD_CL : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
defm IMAGE_SAMPLE_C_CD : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
@ -1445,53 +1444,37 @@ defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
fminnum>;
defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
fmaxnum>;
defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32,
AMDGPUsmin
>;
defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32,
AMDGPUsmax
>;
defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32,
AMDGPUumin
>;
defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32,
AMDGPUumax
>;
defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
// No non-Rev Op on VI
defm V_LSHRREV_B32 : VOP2Inst <
vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
"v_lshr_b32", "v_lshrrev_b32"
"v_lshr_b32"
>;
// No non-Rev OP on VI
defm V_ASHRREV_I32 : VOP2Inst <
vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
"v_ashr_i32", "v_ashrrev_i32"
"v_ashr_i32"
>;
// No non-Rev OP on VI
defm V_LSHLREV_B32 : VOP2Inst <
vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
"v_lshl_b32", "v_lshlrev_b32"
"v_lshl_b32"
>;
defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32",
VOP_I32_I32_I32, and>;
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32",
VOP_I32_I32_I32, or
>;
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32",
VOP_I32_I32_I32, xor
>;
defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
} // End isCommutable = 1
defm V_MADMK_F32 : VOP2Inst <vop2<0x20, 0x17>, "v_madmk_f32", VOP_F32_F32_F32>;
defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
let isCommutable = 1 in {
defm V_MADAK_F32 : VOP2Inst <vop2<0x21, 0x18>, "v_madak_f32", VOP_F32_F32_F32>;
defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
} // End isCommutable = 1
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
@ -1503,9 +1486,7 @@ let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
VOP_I32_I32_I32, add
>;
defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32",
VOP_I32_I32_I32, sub
>;
defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
VOP_I32_I32_I32, null_frag, "v_sub_i32"
@ -1513,10 +1494,10 @@ defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
let Uses = [VCC] in { // Carry-in comes from VCC
defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
VOP_I32_I32_I32_VCC, adde
VOP_I32_I32_I32_VCC
>;
defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
VOP_I32_I32_I32_VCC, sube
VOP_I32_I32_I32_VCC
>;
defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
@ -1529,47 +1510,41 @@ defm V_READLANE_B32 : VOP2SI_3VI_m <
vop3 <0x001, 0x289>,
"v_readlane_b32",
(outs SReg_32:$vdst),
(ins VGPR_32:$src0, SSrc_32:$vsrc1),
"v_readlane_b32 $vdst, $src0, $vsrc1"
(ins VGPR_32:$src0, SCSrc_32:$src1),
"v_readlane_b32 $vdst, $src0, $src1"
>;
defm V_WRITELANE_B32 : VOP2SI_3VI_m <
vop3 <0x002, 0x28a>,
"v_writelane_b32",
(outs VGPR_32:$vdst),
(ins SReg_32:$src0, SSrc_32:$vsrc1),
"v_writelane_b32 $vdst, $src0, $vsrc1"
(ins SReg_32:$src0, SCSrc_32:$src1),
"v_writelane_b32 $vdst, $src0, $src1"
>;
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2Inst <vop2<0x6>, "v_mac_legacy_f32",
VOP_F32_F32_F32
>;
} // End isCommutable = 1
defm V_MIN_LEGACY_F32 : VOP2Inst <vop2<0xd>, "v_min_legacy_f32",
defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmin_legacy
>;
defm V_MAX_LEGACY_F32 : VOP2Inst <vop2<0xe>, "v_max_legacy_f32",
defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
VOP_F32_F32_F32, AMDGPUfmax_legacy
>;
let isCommutable = 1 in {
defm V_LSHR_B32 : VOP2Inst <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32, srl>;
defm V_ASHR_I32 : VOP2Inst <vop2<0x17>, "v_ashr_i32",
VOP_I32_I32_I32, sra
>;
let hasPostISelHook = 1 in {
defm V_LSHL_B32 : VOP2Inst <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32, shl>;
}
defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
} // End isCommutable = 1
} // End let SubtargetPredicate = SICI
let isCommutable = 1 in {
defm V_MAC_LEGACY_F32 : VOP2_VI3_Inst <vop23<0x6, 0x28e>, "v_mac_legacy_f32",
VOP_F32_F32_F32
>;
} // End isCommutable = 1
defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32", VOP_I32_I32_I32,
AMDGPUbfm
>;
@ -1586,14 +1561,25 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "v_cvt_pkaccum_u8_f32", []>;
////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "v_cvt_pknorm_i16_f32", []>;
////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "v_cvt_pknorm_u16_f32", []>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
VOP_I32_F32_F32, int_SI_packf16
defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
VOP_I32_F32_I32>; // TODO: set "Uses = dst"
defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
VOP_I32_F32_F32
>;
defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
VOP_I32_F32_F32
>;
defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
VOP_I32_F32_F32, int_SI_packf16
>;
defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
VOP_I32_I32_I32
>;
defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
VOP_I32_I32_I32
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "v_cvt_pk_u16_u32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "v_cvt_pk_i16_i32", []>;
//===----------------------------------------------------------------------===//
// VOP3 Instructions
@ -1659,27 +1645,34 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
VOP_I32_I32_I32_I32
>;
defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
VOP_F32_F32_F32_F32, AMDGPUfmin3>;
defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
VOP_I32_I32_I32_I32, AMDGPUsmin3
>;
defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
VOP_I32_I32_I32_I32, AMDGPUumin3
>;
defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
VOP_F32_F32_F32_F32, AMDGPUfmax3
>;
defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
VOP_I32_I32_I32_I32, AMDGPUsmax3
>;
defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
VOP_I32_I32_I32_I32, AMDGPUumax3
>;
//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
VOP_F32_F32_F32_F32
>;
defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
VOP_I32_I32_I32_I32
>;
defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
VOP_I32_I32_I32_I32
>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
@ -1742,21 +1735,36 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteFloatFMA, WriteSALU] in {
defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
}
let SchedRW = [WriteDouble] in {
let SchedRW = [WriteDouble, WriteSALU] in {
// Double precision division pre-scale.
defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
} // let SchedRW = [WriteDouble]
let isCommutable = 1 in {
defm V_DIV_FMAS_F32 : VOP3Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
let isCommutable = 1, Uses = [VCC] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
// result *= 2^32
//
defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
let SchedRW = [WriteDouble] in {
defm V_DIV_FMAS_F64 : VOP3Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
// v_div_fmas_f64:
// result = src0 * src1 + src2
// if (vcc)
// result *= 2^64
//
defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
>;
} // End SchedRW = [WriteDouble]
} // End isCommutable = 1
@ -1774,23 +1782,29 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64",
VOP_I64_I64_I32, shl
>;
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64",
VOP_I64_I64_I32, srl
>;
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64",
VOP_I64_I64_I32, sra
>;
defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
VOP_F32_F32_F32_F32>;
} // End SubtargetPredicate = isSICI
let SubtargetPredicate = isVI in {
defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
VOP_I64_I32_I64
>;
defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
VOP_I64_I32_I64
>;
defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
VOP_I64_I32_I64
>;
} // End SubtargetPredicate = isVI
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
@ -1809,8 +1823,8 @@ def SGPR_USE : InstSI <(outs),(ins), "", []>;
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
Uses = [EXEC], Defs = [EXEC] in {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
let Uses = [EXEC], Defs = [EXEC] in {
let isBranch = 1, isTerminator = 1 in {
@ -1867,15 +1881,18 @@ def SI_END_CF : InstSI <
[(int_SI_end_cf i64:$saved)]
>;
} // End Uses = [EXEC], Defs = [EXEC]
let Uses = [EXEC], Defs = [EXEC,VCC] in {
def SI_KILL : InstSI <
(outs),
(ins VSrc_32:$src),
"si_kill $src",
[(int_AMDGPU_kill f32:$src)]
>;
} // End Uses = [EXEC], Defs = [EXEC,VCC]
} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
// Uses = [EXEC], Defs = [EXEC]
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
@ -2020,16 +2037,12 @@ def : Pat <
(SI_KILL 0xbf800000)
>;
let Predicates = [isSICI] in {
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
} // End Predicates = [isSICI]
/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
@ -2156,9 +2169,13 @@ def : Pat <
//===----------------------------------------------------------------------===//
let Predicates = [UnsafeFPMath] in {
def : RcpPat<V_RCP_F64_e32, f64>;
defm : RsqPat<V_RSQ_F64_e32, f64>;
defm : RsqPat<V_RSQ_F32_e32, f32>;
//def : RcpPat<V_RCP_F64_e32, f64>;
//defm : RsqPat<V_RSQ_F64_e32, f64>;
//defm : RsqPat<V_RSQ_F32_e32, f32>;
def : RsqPat<V_RSQ_F32_e32, f32>;
def : RsqPat<V_RSQ_F64_e32, f64>;
}
//===----------------------------------------------------------------------===//
@ -2675,13 +2692,6 @@ def : Pat <
(V_MUL_LEGACY_F32_e32 $src0, (V_RCP_LEGACY_F32_e32 $src1))
>;
def : Pat<
(fdiv f64:$src0, f64:$src1),
(V_MUL_F64 0 /* src0_modifiers */, $src0,
0 /* src1_modifiers */, (V_RCP_F64_e32 $src1),
0 /* clamp */, 0 /* omod */)
>;
def : Pat <
(int_AMDGPU_cube v4f32:$src),
(REG_SEQUENCE VReg_128,
@ -2716,16 +2726,12 @@ class Ext32Pat <SDNode ext> : Pat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
let Predicates = [isSICI] in {
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
} // End Predicates = [isSICI]
// The multiplication scales from [0,1] to the unsigned integer range
def : Pat <
(AMDGPUurecip i32:$src0),
@ -2907,7 +2913,6 @@ class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
(Instr $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
let Predicates = [isSICI] in {
def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
@ -2915,7 +2920,6 @@ def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
} // End Predicates = [isSICI]
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
@ -2954,14 +2958,12 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
}
let Predicates = [isSICI] in {
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
} // End Predicates = [isSICI]
class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
@ -2969,13 +2971,11 @@ class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
(Instr $value, $srsrc, $vaddr, $soffset, $offset, 0, 0, 0)
>;
let Predicates = [isSICI] in {
def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
} // End Predicates = [isSICI]
/*
class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
@ -3245,6 +3245,12 @@ def : Pat <
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
>;
def : Pat <
(i1 (trunc i64:$a)),
(V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
(EXTRACT_SUBREG $a, sub0)), 1)
>;
def : Pat <
(i32 (bswap i32:$a)),
(V_BFI_B32 (S_MOV_B32 0x00ff00ff),
@ -3257,6 +3263,28 @@ def : Pat <
(V_CNDMASK_B32_e64 $src0, $src1, $src2)
>;
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
let Predicates = [isCI] in {
// Convert (x - floor(x)) to fract(x)
def : Pat <
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
(f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
(V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
// Convert (x + (-floor(x))) to fract(x)
def : Pat <
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
} // End Predicates = [isCI]
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//

View File

@ -88,7 +88,8 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
void computeIndirectRegAndOffset(unsigned VecReg, unsigned &Reg, int &Offset);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@ -323,7 +324,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
@ -333,8 +334,14 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
unsigned Idx = MI.getOperand(3).getReg();
if (AMDGPU::SReg_32RegClass.contains(Idx)) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
if (Offset) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
.addReg(Idx)
.addImm(Offset);
} else {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
}
MBB.insert(I, MovRel);
} else {
@ -363,6 +370,11 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
.addReg(AMDGPU::VCC);
if (Offset) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
.addReg(AMDGPU::M0)
.addImm(Offset);
}
// Do the actual move
MBB.insert(I, MovRel);
@ -384,6 +396,33 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MI.eraseFromParent();
}
/// \param @VecReg The register which holds element zero of the vector
/// being addressed into.
/// \param[out] @Reg The base register to use in the indirect addressing instruction.
/// \param[in,out] @Offset As an input, this is the constant offset part of the
// indirect Index. e.g. v0 = v[VecReg + Offset]
// As an output, this is a constant value that needs
// to be added to the value stored in M0.
void SILowerControlFlowPass::computeIndirectRegAndOffset(unsigned VecReg,
unsigned &Reg,
int &Offset) {
unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
if (!SubReg)
SubReg = VecReg;
const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
int RegIdx = TRI->getHWRegIndex(SubReg) + Offset;
if (RegIdx < 0) {
Offset = RegIdx;
RegIdx = 0;
} else {
Offset = 0;
}
Reg = RC->getRegister(RegIdx);
}
void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
@ -391,18 +430,18 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
unsigned Dst = MI.getOperand(0).getReg();
unsigned Vec = MI.getOperand(2).getReg();
unsigned Off = MI.getOperand(4).getImm();
unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
if (!SubReg)
SubReg = Vec;
int Off = MI.getOperand(4).getImm();
unsigned Reg;
computeIndirectRegAndOffset(Vec, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
.addReg(SubReg + Off)
.addReg(Reg)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Vec, RegState::Implicit);
LoadM0(MI, MovRel);
LoadM0(MI, MovRel, Off);
}
void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
@ -411,20 +450,20 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
unsigned Off = MI.getOperand(4).getImm();
int Off = MI.getOperand(4).getImm();
unsigned Val = MI.getOperand(5).getReg();
unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
if (!SubReg)
SubReg = Dst;
unsigned Reg;
computeIndirectRegAndOffset(Dst, Reg, Off);
MachineInstr *MovRel =
BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
.addReg(SubReg + Off, RegState::Define)
.addReg(Reg, RegState::Define)
.addReg(Val)
.addReg(AMDGPU::M0, RegState::Implicit)
.addReg(Dst, RegState::Implicit);
LoadM0(MI, MovRel);
LoadM0(MI, MovRel, Off);
}
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
@ -447,7 +486,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
if (TII->isDS(MI.getOpcode()))
if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
@ -513,12 +552,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_INDIRECT_DST_V16:
IndirectDst(MI);
break;
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
NeedWQM = true;
break;
}
}
}

View File

@ -14,7 +14,6 @@
#include "SIRegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@ -47,13 +46,31 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(AMDGPU::VGPR255);
Reserved.set(AMDGPU::VGPR254);
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
// Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
// Assume XNACK_MASK is unused.
unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
MCRegAliasIterator R = MCRegAliasIterator(Reg, this, true);
for (; R.isValid(); ++R)
Reserved.set(*R);
}
}
return Reserved;
}
unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
// FIXME: We should adjust the max number of waves based on LDS size.
unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
unsigned SGPRLimit = getNumSGPRsAllowed(ST.getGeneration(),
ST.getMaxWavesPerCU());
unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
for (regclass_iterator I = regclass_begin(), E = regclass_end();
@ -204,7 +221,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Ctx.emitError("Ran out of VGPRs for spilling SGPR");
}
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
.addReg(SubReg)
.addImm(Spill.Lane);
@ -236,7 +255,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (isM0)
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane)
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
@ -245,7 +266,22 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg);
}
}
TII->insertNOPs(MI, 3);
// TODO: only do this when it is needed
switch (ST.getGeneration()) {
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
TII->insertNOPs(MI, 3);
break;
case AMDGPUSubtarget::SEA_ISLANDS:
break;
default: // VOLCANIC_ISLANDS and later
// "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
// and later. This also applies to VALUs which write VCC, but we're
// unlikely to see VMEM use VCC.
TII->insertNOPs(MI, 4);
}
MI->eraseFromParent();
break;
}
@ -490,14 +526,24 @@ unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
}
}
unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
switch(WaveCount) {
case 10: return 48;
case 9: return 56;
case 8: return 64;
case 7: return 72;
case 6: return 80;
case 5: return 96;
default: return 103;
unsigned SIRegisterInfo::getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
unsigned WaveCount) const {
if (gen >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
switch (WaveCount) {
case 10: return 80;
case 9: return 80;
case 8: return 96;
default: return 102;
}
} else {
switch(WaveCount) {
case 10: return 48;
case 9: return 56;
case 8: return 64;
case 7: return 72;
case 6: return 80;
case 5: return 96;
default: return 103;
}
}
}

View File

@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/Support/Debug.h"
namespace llvm {
@ -111,7 +112,8 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
/// concurrent waves.
unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
unsigned getNumSGPRsAllowed(AMDGPUSubtarget::Generation gen,
unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;

View File

@ -209,7 +209,9 @@ def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
}
class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
let OperandNamespace = "AMDGPU";

View File

@ -136,6 +136,32 @@ class VOP3e_vi <bits<10> op> : Enc64 {
let Inst{63} = src2_modifiers{0};
}
class VOP3be_vi <bits<10> op> : Enc64 {
bits<8> vdst;
bits<2> src0_modifiers;
bits<9> src0;
bits<2> src1_modifiers;
bits<9> src1;
bits<2> src2_modifiers;
bits<9> src2;
bits<7> sdst;
bits<2> omod;
bits<1> clamp;
let Inst{7-0} = vdst;
let Inst{14-8} = sdst;
let Inst{15} = clamp;
let Inst{25-16} = op;
let Inst{31-26} = 0x34; //encoding
let Inst{40-32} = src0;
let Inst{49-41} = src1;
let Inst{58-50} = src2;
let Inst{60-59} = omod;
let Inst{61} = src0_modifiers{0};
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
}
class EXPe_vi : EXPe {
let Inst{31-26} = 0x31; //encoding
}

View File

@ -9,18 +9,6 @@
// Instruction definitions for VI and newer.
//===----------------------------------------------------------------------===//
let SubtargetPredicate = isVI in {
defm BUFFER_LOAD_DWORD_VI : MUBUF_Load_Helper_vi <
0x14, "buffer_load_dword", VGPR_32, i32, global_load
>;
defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
0x03, "buffer_load_format_xyzw", VReg_128
>;
} // End SubtargetPredicate = isVI
//===----------------------------------------------------------------------===//
// SMEM Patterns
@ -28,37 +16,10 @@ defm BUFFER_LOAD_FORMAT_XYZW_VI : MUBUF_Load_Helper_vi <
let Predicates = [isVI] in {
// 1. Offset as 8bit DWORD immediate
// 1. Offset as 20bit DWORD immediate
def : Pat <
(SIload_constant v4i32:$sbase, IMM20bit:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
//===----------------------------------------------------------------------===//
// MUBUF Patterns
//===----------------------------------------------------------------------===//
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
// Offset in an 32Bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_VI_OFFEN $sbase, $voff, 0, 0, 0, 0, 0)
>;
/* int_SI_vs_load_input */
def : Pat<
(SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_VI_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_VI_OFFSET,
BUFFER_LOAD_DWORD_VI_OFFEN,
BUFFER_LOAD_DWORD_VI_IDXEN,
BUFFER_LOAD_DWORD_VI_BOTHEN>;
} // End Predicates = [isVI]

View File

@ -392,12 +392,25 @@ static bool usesTheStack(const MachineFunction &MF) {
return false;
}
void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
unsigned &CallOp,
const char *&Symbol) {
CallOp = STI.is64Bit() ? X86::W64ALLOCA : X86::CALLpcrel32;
void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL) {
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo());
if (STI.is64Bit()) {
unsigned CallOp;
if (Is64Bit)
CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
else
CallOp = X86::CALLpcrel32;
const char *Symbol;
if (Is64Bit) {
if (STI.isTargetCygMing()) {
Symbol = "___chkstk_ms";
} else {
@ -407,6 +420,37 @@ void X86FrameLowering::getStackProbeFunction(const X86Subtarget &STI,
Symbol = "_alloca";
else
Symbol = "_chkstk";
MachineInstrBuilder CI;
// All current stack probes take AX and SP as input, clobber flags, and
// preserve all registers. x86_64 probes leave RSP unmodified.
if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
// For the large code model, we have to call through a register. Use R11,
// as it is scratch in all supported calling conventions.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
.addExternalSymbol(Symbol);
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
} else {
CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
}
unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
CI.addReg(AX, RegState::Implicit)
.addReg(SP, RegState::Implicit)
.addReg(AX, RegState::Define | RegState::Implicit)
.addReg(SP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
if (Is64Bit) {
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
// themselves. It also does not clobber %rax so we can reuse it when
// adjusting %rsp.
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
}
/// emitPrologue - Push callee-saved registers onto the stack, which
@ -739,11 +783,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
if (NumBytes >= StackProbeSize && UseStackProbe) {
const char *StackProbeSymbol;
unsigned CallOp;
getStackProbeFunction(STI, CallOp, StackProbeSymbol);
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
@ -772,22 +811,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.setMIFlag(MachineInstr::FrameSetup);
}
BuildMI(MBB, MBBI, DL,
TII.get(CallOp))
.addExternalSymbol(StackProbeSymbol)
.addReg(StackPtr, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
// Save a pointer to the MI where we set AX.
MachineBasicBlock::iterator SetRAX = MBBI;
--SetRAX;
// Call __chkstk, __chkstk_ms, or __alloca.
emitStackProbeCall(MF, MBB, MBBI, DL);
// Apply the frame setup flag to all inserted instrs.
for (; SetRAX != MBBI; ++SetRAX)
SetRAX->setFlag(MachineInstr::FrameSetup);
if (Is64Bit) {
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
// themself. It also does not clobber %rax so we can reuse it when
// adjusting %rsp.
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
.addReg(StackPtr)
.addReg(X86::RAX)
.setMIFlag(MachineInstr::FrameSetup);
}
if (isEAXAlive) {
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),

View File

@ -27,9 +27,11 @@ public:
explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
: TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
static void getStackProbeFunction(const X86Subtarget &STI,
unsigned &CallOp,
const char *&Symbol);
/// Emit a call to the target's stack probe function. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
/// the number of bytes to probe in RAX/EAX.
static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL);
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,

View File

@ -15,6 +15,7 @@
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
#include "X86FrameLowering.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
@ -10094,12 +10095,12 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
VT.getVectorNumElements() / 2);
// Check for patterns which can be matched with a single insert of a 128-bit
// subvector.
if (isShuffleEquivalent(Mask, 0, 1, 0, 1) ||
isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
bool OnlyUsesV1 = isShuffleEquivalent(Mask, 0, 1, 0, 1);
if (OnlyUsesV1 || isShuffleEquivalent(Mask, 0, 1, 4, 5)) {
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
Mask[2] < 4 ? V1 : V2, DAG.getIntPtrConstant(0));
OnlyUsesV1 ? V1 : V2, DAG.getIntPtrConstant(0));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
if (isShuffleEquivalent(Mask, 0, 1, 6, 7)) {
@ -10112,7 +10113,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
// Otherwise form a 128-bit permutation.
// FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
int MaskLO = Mask[0];
if (MaskLO == SM_SentinelUndef)
MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
int MaskHI = Mask[2];
if (MaskHI == SM_SentinelUndef)
MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
@ -17172,6 +17181,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
// but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));
case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
// Vector source operands are swapped.
@ -21076,47 +21092,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
assert(!Subtarget->isTargetMachO());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to be subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() ||
Subtarget->isTargetWindowsItanium())
? "_chkstk"
: "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@ -25558,45 +25534,51 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
LHS.getValueType(), RHS, LHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
addV, DAG.getConstant(0, addV.getValueType()), CC);
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), LHS.getValueType(), RHS,
LHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
DAG.getConstant(0, addV.getValueType()), CC);
}
if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N),
RHS.getValueType(), LHS, RHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0),
addV, DAG.getConstant(0, addV.getValueType()), CC);
SDValue addV = DAG.getNode(ISD::ADD, SDLoc(N), RHS.getValueType(), LHS,
RHS.getOperand(1));
return DAG.getSetCC(SDLoc(N), N->getValueType(0), addV,
DAG.getConstant(0, addV.getValueType()), CC);
}
if (VT.getScalarType() == MVT::i1) {
bool IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero0 = ISD::isBuildVectorAllZeros(LHS.getNode());
if (!IsSEXT0 && !IsVZero0)
return SDValue();
bool IsSEXT1 = (RHS.getOpcode() == ISD::SIGN_EXTEND) &&
(RHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
bool IsSEXT0 =
(LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
bool IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
if (!IsSEXT1 && !IsVZero1)
return SDValue();
if (!IsSEXT0 || !IsVZero1) {
// Swap the operands and update the condition code.
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
IsSEXT0 = (LHS.getOpcode() == ISD::SIGN_EXTEND) &&
(LHS.getOperand(0).getValueType().getScalarType() == MVT::i1);
IsVZero1 = ISD::isBuildVectorAllZeros(RHS.getNode());
}
if (IsSEXT0 && IsVZero1) {
assert(VT == LHS.getOperand(0).getValueType() && "Uexpected operand type");
if (CC == ISD::SETEQ)
assert(VT == LHS.getOperand(0).getValueType() &&
"Uexpected operand type");
if (CC == ISD::SETGT)
return DAG.getConstant(0, VT);
if (CC == ISD::SETLE)
return DAG.getConstant(1, VT);
if (CC == ISD::SETEQ || CC == ISD::SETGE)
return DAG.getNOT(DL, LHS.getOperand(0), VT);
assert((CC == ISD::SETNE || CC == ISD::SETLT) &&
"Unexpected condition code!");
return LHS.getOperand(0);
}
if (IsSEXT1 && IsVZero0) {
assert(VT == RHS.getOperand(0).getValueType() && "Uexpected operand type");
if (CC == ISD::SETEQ)
return DAG.getNOT(DL, RHS.getOperand(0), VT);
return RHS.getOperand(0);
}
}
return SDValue();

View File

@ -279,7 +279,8 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
}
let isCall = 1, isCodeGenOnly = 1 in
// __chkstk(MSVC): clobber R10, R11 and EFLAGS.
// __chkstk(MSVC): clobber R10, R11 and EFLAGS
// ___chkstk_ms(Mingw64): clobber R10, R11 and EFLAGS
// ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
let Defs = [RAX, R10, R11, RSP, EFLAGS],
Uses = [RSP] in {

View File

@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),

View File

@ -47,6 +47,8 @@ using namespace llvm;
static cl::opt<std::string>
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
cl::ValueRequired);
static cl::opt<bool> DefaultExitBlockBeforeBody("gcov-exit-block-before-body",
cl::init(false), cl::Hidden);
GCOVOptions GCOVOptions::getDefault() {
GCOVOptions Options;
@ -312,7 +314,7 @@ namespace {
class GCOVFunction : public GCOVRecord {
public:
GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident,
bool UseCfgChecksum)
bool UseCfgChecksum, bool ExitBlockBeforeBody)
: SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
ReturnBlock(1, os) {
this->os = os;
@ -322,11 +324,13 @@ namespace {
uint32_t i = 0;
for (auto &BB : *F) {
// Skip index 1 (0, 2, 3, 4, ...) because that's assigned to the
// ReturnBlock.
bool first = i == 0;
Blocks.insert(std::make_pair(&BB, GCOVBlock(i++ + !first, os)));
// Skip index 1 if it's assigned to the ReturnBlock.
if (i == 1 && ExitBlockBeforeBody)
++i;
Blocks.insert(std::make_pair(&BB, GCOVBlock(i++, os)));
}
if (!ExitBlockBeforeBody)
ReturnBlock.Number = i;
std::string FunctionNameAndLine;
raw_string_ostream FNLOS(FunctionNameAndLine);
@ -469,7 +473,7 @@ static bool functionHasLines(Function *F) {
if (Loc.isUnknown()) continue;
// Artificial lines such as calls to the global constructors.
if (Loc.getLine() == 0) continue;
if (Loc.getLine() == 0) continue;
return true;
}
@ -513,7 +517,8 @@ void GCOVProfiler::emitProfileNotes() {
EntryBlock.splitBasicBlock(It);
Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
Options.UseCfgChecksum));
Options.UseCfgChecksum,
DefaultExitBlockBeforeBody));
GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {

View File

@ -2183,12 +2183,16 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
// Handle the floating point versions of equality comparisons too.
if ((isKnownTrue && Cmp->getPredicate() == CmpInst::FCMP_OEQ) ||
(isKnownFalse && Cmp->getPredicate() == CmpInst::FCMP_UNE)) {
// Floating point -0.0 and 0.0 compare equal, so we can't
// propagate a constant based on that comparison.
// Floating point -0.0 and 0.0 compare equal, so we can only
// propagate values if we know that we have a constant and that
// its value is non-zero.
// FIXME: We should do this optimization if 'no signed zeros' is
// applicable via an instruction-level fast-math-flag or some other
// indicator that relaxed FP semantics are being used.
if (!isa<ConstantFP>(Op1) || !cast<ConstantFP>(Op1)->isZero())
if (isa<ConstantFP>(Op1) && !cast<ConstantFP>(Op1)->isZero())
Worklist.push_back(std::make_pair(Op0, Op1));
}

View File

@ -288,14 +288,11 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
IntegerType *IT = cast<IntegerType>(IVOperand->getType());
Value *OtherOperand = nullptr;
int OtherOperandIdx = -1;
if (BO->getOperand(0) == IVOperand) {
OtherOperand = BO->getOperand(1);
OtherOperandIdx = 1;
} else {
assert(BO->getOperand(1) == IVOperand && "only other use!");
OtherOperand = BO->getOperand(0);
OtherOperandIdx = 0;
}
bool Changed = false;

View File

@ -1,11 +1,11 @@
This is a set of individual patches, which contain all the customizations to
llvm/clang currently in the FreeBSD base system. These can be applied in
alphabetical order to a pristine llvm/clang 3.6.0 source tree, for example by
alphabetical order to a pristine llvm/clang 3.6.1 source tree, for example by
doing:
svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_360/final llvm-3.6.0
svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_360/final llvm-3.6.0/tools/clang
cd llvm-3.6.0
svn co https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_361/final llvm-3.6.1
svn co https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_361/final llvm-3.6.1/tools/clang
cd llvm-3.6.1
for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
patch -p0 -f -F0 -E -i $p -s || break
done

View File

@ -44,7 +44,7 @@ Index: tools/clang/include/clang/Sema/Sema.h
===================================================================
--- tools/clang/include/clang/Sema/Sema.h
+++ tools/clang/include/clang/Sema/Sema.h
@@ -8566,6 +8566,7 @@ class Sema {
@@ -8567,6 +8567,7 @@ class Sema {
FST_Strftime,
FST_Strfmon,
FST_Kprintf,
@ -230,7 +230,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
===================================================================
--- tools/clang/lib/Sema/SemaChecking.cpp
+++ tools/clang/lib/Sema/SemaChecking.cpp
@@ -2584,6 +2584,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
@@ -2603,6 +2603,7 @@ Sema::FormatStringType Sema::GetFormatStringType(c
.Case("strftime", FST_Strftime)
.Case("strfmon", FST_Strfmon)
.Cases("kprintf", "cmn_err", "vcmn_err", "zcmn_err", FST_Kprintf)
@ -238,7 +238,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
.Default(FST_Unknown);
}
@@ -3365,6 +3366,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
@@ -3384,6 +3385,43 @@ CheckPrintfHandler::HandlePrintfSpecifier(const an
CoveredArgs.set(argIndex);
}
@ -282,7 +282,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
// Check for using an Objective-C specific conversion specifier
// in a non-ObjC literal.
if (!ObjCContext && CS.isObjCArg()) {
@@ -3988,7 +4026,8 @@ void Sema::CheckFormatString(const StringLiteral *
@@ -4007,7 +4045,8 @@ void Sema::CheckFormatString(const StringLiteral *
return;
}
@ -292,7 +292,7 @@ Index: tools/clang/lib/Sema/SemaChecking.cpp
CheckPrintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
numDataArgs, (Type == FST_NSString),
Str, HasVAListArg, Args, format_idx,
@@ -3996,7 +4035,8 @@ void Sema::CheckFormatString(const StringLiteral *
@@ -4015,7 +4054,8 @@ void Sema::CheckFormatString(const StringLiteral *
if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
getLangOpts(),

View File

@ -596,7 +596,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
static unsigned getLEArOpcode(unsigned IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
@@ -1848,100 +1865,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
@@ -1882,100 +1899,6 @@ void X86FrameLowering::adjustForHiPEPrologue(Machi
#endif
}
@ -697,7 +697,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -1956,7 +1879,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -1990,7 +1913,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
@ -706,7 +706,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
I = MBB.erase(I);
if (!reserveCallFrame) {
@@ -1976,24 +1899,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -2010,24 +1933,18 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = nullptr;
@ -740,7 +740,7 @@ Index: lib/Target/X86/X86FrameLowering.cpp
unsigned Opc = getADDriOpcode(IsLP64, Amount);
New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr).addImm(Amount);
@@ -2011,13 +1928,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -2045,13 +1962,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF,
return;
}
@ -761,7 +761,7 @@ Index: lib/Target/X86/X86FrameLowering.h
===================================================================
--- lib/Target/X86/X86FrameLowering.h
+++ lib/Target/X86/X86FrameLowering.h
@@ -64,6 +64,8 @@ class X86FrameLowering : public TargetFrameLowerin
@@ -66,6 +66,8 @@ class X86FrameLowering : public TargetFrameLowerin
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;

File diff suppressed because it is too large Load Diff

View File

@ -67,7 +67,7 @@ Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
}
}
@@ -7696,11 +7696,8 @@ void SelectionDAGISel::LowerArguments(const Functi
@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
@ -80,7 +80,7 @@ Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7715,6 +7712,8 @@ void SelectionDAGISel::LowerArguments(const Functi
@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi
MyFlags.Flags.setOrigAlign(1);
Ins.push_back(MyFlags);
}
@ -287,7 +287,7 @@ Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -11280,7 +11280,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas
return (Members > 0 && Members <= 4);
}
@ -298,7 +298,7 @@ Index: lib/Target/ARM/ARMISelLowering.cpp
bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
if (getEffectiveCallingConv(CallConv, isVarArg) !=
@@ -11289,7 +11291,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse
HABaseType Base = HA_UNKNOWN;
uint64_t Members = 0;

View File

@ -1,55 +0,0 @@
Pull in r230058 from upstream llvm trunk (by Benjamin Kramer):
LoopRotate: When reconstructing loop simplify form don't split edges
from indirectbrs.
Yet another chapter in the endless story. While this looks like we
leave the loop in a non-canonical state this replicates the logic in
LoopSimplify so it doesn't diverge from the canonical form in any way.
PR21968
This fixes a "Cannot split critical edge from IndirectBrInst" assertion
failure when building the devel/radare2 port.
Introduced here: https://svnweb.freebsd.org/changeset/base/279161
Index: lib/Transforms/Scalar/LoopRotation.cpp
===================================================================
--- lib/Transforms/Scalar/LoopRotation.cpp
+++ lib/Transforms/Scalar/LoopRotation.cpp
@@ -535,6 +535,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool Simplifi
Loop *PredLoop = LI->getLoopFor(*PI);
if (!PredLoop || PredLoop->contains(Exit))
continue;
+ if (isa<IndirectBrInst>((*PI)->getTerminator()))
+ continue;
SplitLatchEdge |= L->getLoopLatch() == *PI;
BasicBlock *ExitSplit = SplitCriticalEdge(*PI, Exit, this);
ExitSplit->moveBefore(Exit);
Index: test/Transforms/LoopRotate/crash.ll
===================================================================
--- test/Transforms/LoopRotate/crash.ll
+++ test/Transforms/LoopRotate/crash.ll
@@ -153,3 +153,21 @@ entry:
"5": ; preds = %"3", %entry
ret void
}
+
+; PR21968
+define void @test8(i1 %C, i8* %P) #0 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ br i1 %C, label %l_bad, label %for.body
+
+for.body: ; preds = %for.cond
+ indirectbr i8* %P, [label %for.inc, label %l_bad]
+
+for.inc: ; preds = %for.body
+ br label %for.cond
+
+l_bad: ; preds = %for.body, %for.cond
+ ret void
+}

View File

@ -1,811 +0,0 @@
Pull in r231227 from upstream llvm trunk (by Kristof Beyls):
Fix PR22408 - LLVM producing AArch64 TLS relocations that GNU linkers
cannot handle yet.
As is described at http://llvm.org/bugs/show_bug.cgi?id=22408, the
GNU linkers ld.bfd and ld.gold currently only support a subset of the
whole range of AArch64 ELF TLS relocations. Furthermore, they assume
that some of the code sequences to access thread-local variables are
produced in a very specific sequence. When the sequence is not as the
linker expects, it can silently mis-relaxe/mis-optimize the
instructions.
Even if that wouldn't be the case, it's good to produce the exact
sequence, as that ensures that linkers can perform optimizing
relaxations.
This patch:
* implements support for 16MiB TLS area size instead of 4GiB TLS area
size. Ideally clang would grow an -mtls-size option to allow
support for both, but that's not part of this patch.
* by default doesn't produce local dynamic access patterns, as even
modern ld.bfd and ld.gold linkers do not support the associated
relocations. An option (-aarch64-elf-ldtls-generation) is added to
enable generation of local dynamic code sequence, but is off by
default.
* makes sure that the exact expected code sequence for local dynamic
and general dynamic accesses is produced, by making use of a new
pseudo instruction. The patch also removes two
(AArch64ISD::TLSDESC_BLR, AArch64ISD::TLSDESC_CALL) pre-existing
AArch64-specific pseudo SDNode instructions that are superseded by
the new one (TLSDESC_CALLSEQ).
Introduced here: https://svnweb.freebsd.org/changeset/base/280865
Index: lib/Target/AArch64/AArch64AsmPrinter.cpp
===================================================================
--- lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -12,6 +12,8 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "AArch64.h"
#include "AArch64MCInstLower.h"
#include "AArch64MachineFunctionInfo.h"
@@ -494,12 +496,47 @@ void AArch64AsmPrinter::EmitInstruction(const Mach
EmitToStreamer(OutStreamer, TmpInst);
return;
}
- case AArch64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
+ case AArch64::TLSDESC_CALLSEQ: {
+ /// lower this to:
+ /// adrp x0, :tlsdesc:var
+ /// ldr x1, [x0, #:tlsdesc_lo12:var]
+ /// add x0, x0, #:tlsdesc_lo12:var
+ /// .tlsdesccall var
+ /// blr x1
+ /// (TPIDR_EL0 offset now in x0)
+ const MachineOperand &MO_Sym = MI->getOperand(0);
+ MachineOperand MO_TLSDESC_LO12(MO_Sym), MO_TLSDESC(MO_Sym);
+ MCOperand Sym, SymTLSDescLo12, SymTLSDesc;
+ MO_TLSDESC_LO12.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ MO_TLSDESC.setTargetFlags(AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ MCInstLowering.lowerOperand(MO_Sym, Sym);
+ MCInstLowering.lowerOperand(MO_TLSDESC_LO12, SymTLSDescLo12);
+ MCInstLowering.lowerOperand(MO_TLSDESC, SymTLSDesc);
- // First emit a relocation-annotation. This expands to no code, but requests
+ MCInst Adrp;
+ Adrp.setOpcode(AArch64::ADRP);
+ Adrp.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Adrp.addOperand(SymTLSDesc);
+ EmitToStreamer(OutStreamer, Adrp);
+
+ MCInst Ldr;
+ Ldr.setOpcode(AArch64::LDRXui);
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ Ldr.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Ldr.addOperand(SymTLSDescLo12);
+ Ldr.addOperand(MCOperand::CreateImm(0));
+ EmitToStreamer(OutStreamer, Ldr);
+
+ MCInst Add;
+ Add.setOpcode(AArch64::ADDXri);
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(MCOperand::CreateReg(AArch64::X0));
+ Add.addOperand(SymTLSDescLo12);
+ Add.addOperand(MCOperand::CreateImm(AArch64_AM::getShiftValue(0)));
+ EmitToStreamer(OutStreamer, Add);
+
+ // Emit a relocation-annotation. This expands to no code, but requests
// the following instruction gets an R_AARCH64_TLSDESC_CALL.
MCInst TLSDescCall;
TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
@@ -506,12 +543,10 @@ void AArch64AsmPrinter::EmitInstruction(const Mach
TLSDescCall.addOperand(Sym);
EmitToStreamer(OutStreamer, TLSDescCall);
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(AArch64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
+ MCInst Blr;
+ Blr.setOpcode(AArch64::BLR);
+ Blr.addOperand(MCOperand::CreateReg(AArch64::X1));
+ EmitToStreamer(OutStreamer, Blr);
return;
}
Index: lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
===================================================================
--- lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -62,10 +62,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLR:
+ case AArch64::TLSDESC_CALLSEQ:
// Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
+ if (!I->getOperand(0).isSymbol() ||
+ strcmp(I->getOperand(0).getSymbolName(), "_TLS_MODULE_BASE_"))
break;
if (TLSBaseAddrReg)
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -64,10 +64,18 @@ EnableAArch64ExtrGeneration("aarch64-extr-generati
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow AArch64 SLI/SRI formation"),
- cl::init(false));
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+ "aarch64-elf-ldtls-generation", cl::Hidden,
+ cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+ cl::init(false));
+
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
: TargetLowering(TM) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
@@ -760,7 +768,7 @@ const char *AArch64TargetLowering::getTargetNodeNa
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
@@ -3049,61 +3057,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
+/// is a function pointer to carry out the resolution.
///
-/// Thus, the ideal call sequence on AArch64 is:
+/// The sequence is:
+/// adrp x0, :tlsdesc:var
+/// ldr x1, [x0, #:tlsdesc_lo12:var]
+/// add x0, x0, #:tlsdesc_lo12:var
+/// .tlsdesccall var
+/// blr x1
+/// (TPIDR_EL0 offset now in x0)
///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
-///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
-///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
+/// The above sequence must be produced unscheduled, to enable the linker to
+/// optimize/relax this sequence.
+/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+/// above sequence, and expanded really late in the compilation flow, to ensure
+/// the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
+ SDValue Chain = DAG.getEntryNode();
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
- // silly).
- const TargetRegisterInfo *TRI =
- getTargetMachine().getSubtargetImpl()->getRegisterInfo();
- const AArch64RegisterInfo *ARI =
- static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
-
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
+ SmallVector<SDValue, 2> Ops;
Ops.push_back(Chain);
- Ops.push_back(Func);
Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
- Glue = Chain.getValue(1);
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+ SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
@@ -3114,9 +3095,18 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
"ELF TLS only supported in small memory model");
+ // Different choices can be made for the maximum size of the TLS area for a
+ // module. For the small address model, the default TLS size is 16MiB and the
+ // maximum TLS size is 4GiB.
+ // FIXME: add -mtls-size command line option and make it control the 16MiB
+ // vs. 4GiB code sequence generation.
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+ if (Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+ }
SDValue TPOff;
EVT PtrVT = getPointerTy();
@@ -3127,17 +3117,20 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
if (Model == TLSModel::LocalExec) {
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
+ SDValue TPWithOff_lo =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+ HiVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ SDValue TPWithOff =
+ SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+ LoVar, DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ return TPWithOff;
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3152,19 +3145,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3173,40 +3153,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
- AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff =
- SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
} else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
- AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
@@ -3214,7 +3177,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SD
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -29,9 +29,9 @@ enum {
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
CALL, // Function call.
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
+ // Produces the full sequence of instructions for getting the thread pointer
+ // offset of a variable into X0, using the TLSDesc model.
+ TLSDESC_CALLSEQ,
ADRP, // Page address of a TargetGlobalAddress operand.
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
@@ -399,8 +399,8 @@ class AArch64TargetLowering : public TargetLowerin
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -96,6 +96,19 @@ def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCis
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
+
+// Generates the general dynamic sequences, i.e.
+// adrp x0, :tlsdesc:var
+// ldr x1, [x0, #:tlsdesc_lo12:var]
+// add x0, x0, #:tlsdesc_lo12:var
+// .tlsdesccall var
+// blr x1
+
+// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
+// number of operands (the variable)
+def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
+ [SDTCisPtrTy<0>]>;
+
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
@@ -229,11 +242,12 @@ def AArch64Prefetch : SDNode<"AArch64ISD::P
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
-def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
- SDT_AArch64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
+def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
+ SDT_AArch64TLSDescCallSeq,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
@@ -1049,15 +1063,16 @@ def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym)
let AsmString = ".tlsdesccall $sym";
}
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+// FIXME: maybe the scratch register used shouldn't be fixed to X1?
+// FIXME: can "hasSideEffects be dropped?
+let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1,
+ isCodeGenOnly = 1 in
+def TLSDESC_CALLSEQ
+ : Pseudo<(outs), (ins i64imm:$sym),
+ [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
+ (TLSDESC_CALLSEQ texternalsym:$sym)>;
-def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
Index: lib/Target/AArch64/AArch64MCInstLower.cpp
===================================================================
--- lib/Target/AArch64/AArch64MCInstLower.cpp
+++ lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -22,9 +22,12 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
+
AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
: Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
@@ -84,10 +87,16 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandEL
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
Model = Printer.TM.getTLSModel(GV);
+ if (!EnableAArch64ELFLocalDynamicTLSGeneration &&
+ Model == TLSModel::LocalDynamic)
+ Model = TLSModel::GeneralDynamic;
+
} else {
assert(MO.isSymbol() &&
StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
"unexpected external TLS symbol");
+ // The general dynamic access sequence is used to get the
+ // address of _TLS_MODULE_BASE_.
Model = TLSModel::GeneralDynamic;
}
switch (Model) {
@@ -123,6 +132,8 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandEL
RefFlags |= AArch64MCExpr::VK_G1;
else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
RefFlags |= AArch64MCExpr::VK_G0;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_HI12)
+ RefFlags |= AArch64MCExpr::VK_HI12;
if (MO.getTargetFlags() & AArch64II::MO_NC)
RefFlags |= AArch64MCExpr::VK_NC;
Index: lib/Target/AArch64/Utils/AArch64BaseInfo.h
===================================================================
--- lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1229,7 +1229,7 @@ namespace AArch64II {
MO_NO_FLAG,
- MO_FRAGMENT = 0x7,
+ MO_FRAGMENT = 0xf,
/// MO_PAGE - A symbol operand with this flag represents the pc-relative
/// offset of the 4K page containing the symbol. This is used with the
@@ -1257,26 +1257,31 @@ namespace AArch64II {
/// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
MO_G0 = 6,
+ /// MO_HI12 - This flag indicates that a symbol operand represents the bits
+ /// 13-24 of a 64-bit address, used in a arithmetic immediate-shifted-left-
+ /// by-12-bits instruction.
+ MO_HI12 = 7,
+
/// MO_GOT - This flag indicates that a symbol operand represents the
/// address of the GOT entry for the symbol, rather than the address of
/// the symbol itself.
- MO_GOT = 8,
+ MO_GOT = 0x10,
/// MO_NC - Indicates whether the linker is expected to check the symbol
/// reference for overflow. For example in an ADRP/ADD pair of relocations
/// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
+ MO_NC = 0x20,
/// MO_TLS - Indicates that the operand being accessed is some kind of
/// thread-local symbol. On Darwin, only one type of thread-local access
/// exists (pre linker-relaxation), but on ELF the TLSModel used for the
/// referee will affect interpretation.
- MO_TLS = 0x20,
+ MO_TLS = 0x40,
/// MO_CONSTPOOL - This flag indicates that a symbol operand represents
/// the address of a constant pool entry for the symbol, rather than the
/// address of the symbol itself.
- MO_CONSTPOOL = 0x40
+ MO_CONSTPOOL = 0x80
};
} // end namespace AArch64II
Index: test/CodeGen/AArch64/arm64-tls-dynamics.ll
===================================================================
--- test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -aarch64-elf-ldtls-generation=1 -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-RELOC %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOLD %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -filetype=obj < %s | llvm-objdump -r - | FileCheck --check-prefix=CHECK-NOLD-RELOC %s
@general_dynamic_var = external thread_local global i32
@@ -9,22 +11,34 @@ define i32 @test_generaldynamic() {
%val = load i32* @general_dynamic_var
ret i32 %val
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall general_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+
+
; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
; CHECK: ldr w0, [x[[TP]], x0]
+; CHECK-NOLD: mrs x[[TP:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TP]], x0]
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
define i32* @test_generaldynamic_addr() {
@@ -32,12 +46,10 @@ define i32* @test_generaldynamic_addr() {
ret i32* @general_dynamic_var
- ; FIXME: the adrp instructions are redundant (if harmless).
-; CHECK: adrp [[TLSDESC_HI:x[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: add x0, [[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:general_dynamic_var
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
-; CHECK: .tlsdesccall general_dynamic_var
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:general_dynamic_var
+; CHECK-NEXT: .tlsdesccall general_dynamic_var
; CHECK-NEXT: blr [[CALLEE]]
; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
@@ -44,9 +56,15 @@ define i32* @test_generaldynamic_addr() {
; CHECK: add x0, [[TP]], x0
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
@local_dynamic_var = external thread_local(localdynamic) global i32
@@ -58,54 +76,71 @@ define i32 @test_localdynamic() {
ret i32 %val
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: ldr w0, [x[[TPIDR]], x[[TPOFF]]]
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: ldr w0, [x[[TPIDR]], x0]
-; CHECK: add x[[TPREL:[0-9]+]], x0, [[DTP_OFFSET]]
-; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
-
-; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
-
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
+
}
define i32* @test_localdynamic_addr() {
; CHECK-LABEL: test_localdynamic_addr:
- ret i32* @local_dynamic_var
-
; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
+; CHECK-NEXT: add x[[TPOFF:[0-9]+]], x0, :dtprel_hi12:local_dynamic_var
+; CHECK-NEXT: add x[[TPOFF]], x[[TPOFF]], :dtprel_lo12_nc:local_dynamic_var
+; CHECK: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK: add x0, x[[TPIDR]], x[[TPOFF]]
-; CHECK: movz [[DTP_OFFSET:x[0-9]+]], #:dtprel_g1:local_dynamic_var
-; CHECK: movk [[DTP_OFFSET]], #:dtprel_g0_nc:local_dynamic_var
+; CHECK-NOLD: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:local_dynamic_var
+; CHECK-NOLD-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var]
+; CHECK-NOLD-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:local_dynamic_var
+; CHECK-NOLD-NEXT: .tlsdesccall local_dynamic_var
+; CHECK-NOLD-NEXT: blr [[CALLEE]]
+; CHECK-NOLD: mrs x[[TPIDR:[0-9]+]], TPIDR_EL0
+; CHECK-NOLD: add x0, x[[TPIDR]], x0
+ ret i32* @local_dynamic_var
-; CHECK: add [[TPREL:x[0-9]+]], x0, [[DTP_OFFSET]]
-
-; CHECK: mrs [[TPIDR:x[0-9]+]], TPIDR_EL0
-
-; CHECK: add x0, [[TPIDR]], [[TPREL]]
-
; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
-; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
+; CHECK-NOLD-RELOC: R_AARCH64_TLSDESC_CALL
}
; The entire point of the local-dynamic access model is to have a single call to
@@ -122,11 +157,10 @@ define i32 @test_localdynamic_deduplicate() {
%sum = add i32 %val, %val2
ret i32 %sum
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_
-; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
-; CHECK: ldr [[CALLEE:x[0-9]+]], [x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
-; CHECK: .tlsdesccall _TLS_MODULE_BASE_
+; CHECK: adrp x[[DTPREL_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
+; CHECK-NEXT: ldr [[CALLEE:x[0-9]+]], [x[[DTPREL_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE_]
+; CHECK-NEXT: add x0, x[[TLSDESC_HI]], :tlsdesc_lo12:_TLS_MODULE_BASE
+; CHECK-NEXT: .tlsdesccall _TLS_MODULE_BASE_
; CHECK-NEXT: blr [[CALLEE]]
; CHECK-NOT: _TLS_MODULE_BASE_
Index: test/CodeGen/AArch64/arm64-tls-execs.ll
===================================================================
--- test/CodeGen/AArch64/arm64-tls-execs.ll
+++ test/CodeGen/AArch64/arm64-tls-execs.ll
@@ -38,14 +38,13 @@ define i32 @test_local_exec() {
; CHECK-LABEL: test_local_exec:
%val = load i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
-; CHECK: ldr w0, [x[[TP]], [[TP_OFFSET]]]
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x[[R3:[0-9]+]], x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ldr w0, [x[[R3]]]
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
-
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
ret i32 %val
}
@@ -53,11 +52,11 @@ define i32* @test_local_exec_addr() {
; CHECK-LABEL: test_local_exec_addr:
ret i32* @local_exec_var
-; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var
-; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
-; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
-; CHECK: add x0, [[TP]], [[TP_OFFSET]]
+; CHECK: mrs x[[R1:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[R2:[0-9]+]], x[[R1]], :tprel_hi12:local_exec_var
+; CHECK: add x0, x[[R2]], :tprel_lo12_nc:local_exec_var
+; CHECK: ret
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G1
-; CHECK-RELOC: R_AARCH64_TLSLE_MOVW_TPREL_G0_NC
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_HI12
+; CHECK-RELOC: R_AARCH64_TLSLE_ADD_TPREL_LO12_NC
}

View File

@ -1,77 +0,0 @@
Pull in r229911 from upstream llvm trunk (by Benjamin Kramer):
MC: Allow multiple comma-separated expressions on the .uleb128 directive.
For compatiblity with GNU as. Binutils documents this as
'.uleb128 expressions'. Subtle, isn't it?
Introduced here: http://svnweb.freebsd.org/changeset/base/281775
Index: lib/MC/MCParser/AsmParser.cpp
===================================================================
--- lib/MC/MCParser/AsmParser.cpp
+++ lib/MC/MCParser/AsmParser.cpp
@@ -3636,22 +3636,28 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVa
}
/// parseDirectiveLEB128
-/// ::= (.sleb128 | .uleb128) expression
+/// ::= (.sleb128 | .uleb128) [ expression (, expression)* ]
bool AsmParser::parseDirectiveLEB128(bool Signed) {
checkForValidSection();
const MCExpr *Value;
- if (parseExpression(Value))
- return true;
+ for (;;) {
+ if (parseExpression(Value))
+ return true;
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in directive");
+ if (Signed)
+ getStreamer().EmitSLEB128Value(Value);
+ else
+ getStreamer().EmitULEB128Value(Value);
- if (Signed)
- getStreamer().EmitSLEB128Value(Value);
- else
- getStreamer().EmitULEB128Value(Value);
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+ }
+
return false;
}
Index: test/MC/ELF/uleb.s
===================================================================
--- test/MC/ELF/uleb.s
+++ test/MC/ELF/uleb.s
@@ -11,16 +11,17 @@ foo:
.uleb128 128
.uleb128 16383
.uleb128 16384
+ .uleb128 23, 42
// ELF_32: Name: .text
// ELF_32: SectionData (
-// ELF_32: 0000: 00017F80 01FF7F80 8001
+// ELF_32: 0000: 00017F80 01FF7F80 8001172A
// ELF_32: )
// ELF_64: Name: .text
// ELF_64: SectionData (
-// ELF_64: 0000: 00017F80 01FF7F80 8001
+// ELF_64: 0000: 00017F80 01FF7F80 8001172A
// ELF_64: )
// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001172a')
// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001172a')

View File

@ -6972,6 +6972,11 @@ def note_neon_vector_initializer_non_portable_q : Note<
"vcombine_%0%1(vcreate_%0%1(), vcreate_%0%1()) to initialize from integer "
"constants">;
def err_builtin_longjmp_unsupported : Error<
"__builtin_longjmp is not supported for the current target">;
def err_builtin_setjmp_unsupported : Error<
"__builtin_setjmp is not supported for the current target">;
def err_builtin_longjmp_invalid_val : Error<
"argument to __builtin_longjmp must be a constant 1">;
def err_builtin_requires_language : Error<"'%0' is only available in %1">;

Some files were not shown because too many files have changed in this diff Show More