Vendor import of llvm RELEASE_34/dot1-final tag r208032 (effectively, 3.4.1 release):
https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_34/dot1-final@208032
This commit is contained in:
parent
512b84fc6c
commit
68bcb7db19
@ -12,9 +12,10 @@ set(CMAKE_MODULE_PATH
|
||||
|
||||
set(LLVM_VERSION_MAJOR 3)
|
||||
set(LLVM_VERSION_MINOR 4)
|
||||
set(LLVM_VERSION_PATCH 1)
|
||||
|
||||
if (NOT PACKAGE_VERSION)
|
||||
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn")
|
||||
set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
|
||||
endif()
|
||||
|
||||
option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF)
|
||||
@ -42,6 +43,7 @@ set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM")
|
||||
set(CPACK_PACKAGE_VENDOR "LLVM")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH})
|
||||
set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION})
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT")
|
||||
if(WIN32 AND NOT UNIX)
|
||||
|
@ -15,6 +15,10 @@
|
||||
# Define LLVM specific info and directories based on the autoconf variables
|
||||
LLVMPackageName := @PACKAGE_TARNAME@
|
||||
LLVMVersion := @PACKAGE_VERSION@
|
||||
LLVM_VERSION_MAJOR := @LLVM_VERSION_MAJOR@
|
||||
LLVM_VERSION_MINOR := @LLVM_VERSION_MINOR@
|
||||
LLVM_VERSION_PATCH := @LLVM_VERSION_PATCH@
|
||||
LLVM_VERSION_SUFFIX := @LLVM_VERSION_SUFFIX@
|
||||
LLVM_CONFIGTIME := @LLVM_CONFIGTIME@
|
||||
|
||||
###########################################################################
|
||||
|
@ -757,7 +757,7 @@ else
|
||||
Ranlib = ranlib
|
||||
endif
|
||||
|
||||
AliasTool = ln -s
|
||||
AliasTool = ln -sf
|
||||
|
||||
#----------------------------------------------------------
|
||||
# Get the list of source files and compute object file
|
||||
@ -1121,15 +1121,19 @@ ifdef LIBRARYNAME
|
||||
|
||||
# Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
|
||||
LIBRARYNAME := $(strip $(LIBRARYNAME))
|
||||
LIBRARYALIASNAME := $(strip $(LIBRARYALIASNAME))
|
||||
ifdef LOADABLE_MODULE
|
||||
BaseLibName.A := $(LIBRARYNAME).a
|
||||
BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT)
|
||||
BaseAliasName.SO := $(LIBRARYALIASNAME)$(SHLIBEXT)
|
||||
else
|
||||
BaseLibName.A := lib$(LIBRARYNAME).a
|
||||
BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
|
||||
BaseAliasName.SO := $(SharedPrefix)$(LIBRARYALIASNAME)$(SHLIBEXT)
|
||||
endif
|
||||
LibName.A := $(LibDir)/$(BaseLibName.A)
|
||||
LibName.SO := $(SharedLibDir)/$(BaseLibName.SO)
|
||||
AliasName.SO := $(SharedLibDir)/$(BaseAliasName.SO)
|
||||
LibName.O := $(LibDir)/$(LIBRARYNAME).o
|
||||
|
||||
#---------------------------------------------------------
|
||||
@ -1183,12 +1187,17 @@ else
|
||||
DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
|
||||
endif
|
||||
DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO)
|
||||
DestSharedAlias := $(DestSharedLibDir)/$(BaseAliasName.SO)
|
||||
|
||||
install-local:: $(DestSharedLib)
|
||||
|
||||
$(DestSharedLib): $(LibName.SO) $(DestSharedLibDir)
|
||||
$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
|
||||
$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
|
||||
ifdef SHARED_ALIAS
|
||||
$(Echo) Creating alias from $(DestSharedLib) to $(DestSharedAlias)
|
||||
$(Verb) $(AliasTool) $(DestSharedLib) $(DestSharedAlias)
|
||||
endif
|
||||
|
||||
uninstall-local::
|
||||
$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
|
||||
|
@ -31,9 +31,22 @@ dnl===
|
||||
dnl===-----------------------------------------------------------------------===
|
||||
dnl Initialize autoconf and define the package name, version number and
|
||||
dnl address for reporting bugs.
|
||||
AC_INIT([LLVM],[3.4],[http://llvm.org/bugs/])
|
||||
AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
|
||||
AC_DEFINE([LLVM_VERSION_MINOR], [4], [Minor version of the LLVM API])
|
||||
|
||||
AC_INIT([LLVM],[3.4.1],[http://llvm.org/bugs/])
|
||||
|
||||
LLVM_VERSION_MAJOR=3
|
||||
LLVM_VERSION_MINOR=4
|
||||
LLVM_VERSION_PATCH=1
|
||||
LLVM_VERSION_SUFFIX=
|
||||
|
||||
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
|
||||
AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API])
|
||||
AC_DEFINE_UNQUOTED([LLVM_VERSION_PATCH], $LLVM_VERSION_PATCH, [Patch version of the LLVM API])
|
||||
|
||||
AC_SUBST([LLVM_VERSION_MAJOR])
|
||||
AC_SUBST([LLVM_VERSION_MINOR])
|
||||
AC_SUBST([LLVM_VERSION_PATCH])
|
||||
AC_SUBST([LLVM_VERSION_SUFFIX])
|
||||
|
||||
dnl Provide a copyright substitution and ensure the copyright notice is included
|
||||
dnl in the output of --version option of the generated configure script.
|
||||
|
78
configure
vendored
78
configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.60 for LLVM 3.4.
|
||||
# Generated by GNU Autoconf 2.60 for LLVM 3.4.1.
|
||||
#
|
||||
# Report bugs to <http://llvm.org/bugs/>.
|
||||
#
|
||||
@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='LLVM'
|
||||
PACKAGE_TARNAME='llvm'
|
||||
PACKAGE_VERSION='3.4'
|
||||
PACKAGE_STRING='LLVM 3.4'
|
||||
PACKAGE_VERSION='3.4.1'
|
||||
PACKAGE_STRING='LLVM 3.4.1'
|
||||
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
|
||||
|
||||
ac_unique_file="lib/IR/Module.cpp"
|
||||
@ -639,6 +639,10 @@ LIBS
|
||||
build_alias
|
||||
host_alias
|
||||
target_alias
|
||||
LLVM_VERSION_MAJOR
|
||||
LLVM_VERSION_MINOR
|
||||
LLVM_VERSION_PATCH
|
||||
LLVM_VERSION_SUFFIX
|
||||
LLVM_COPYRIGHT
|
||||
CC
|
||||
CFLAGS
|
||||
@ -1330,7 +1334,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures LLVM 3.4 to adapt to many kinds of systems.
|
||||
\`configure' configures LLVM 3.4.1 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1396,7 +1400,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of LLVM 3.4:";;
|
||||
short | recursive ) echo "Configuration of LLVM 3.4.1:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1564,7 +1568,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
LLVM configure 3.4
|
||||
LLVM configure 3.4.1
|
||||
generated by GNU Autoconf 2.60
|
||||
|
||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
|
||||
@ -1580,7 +1584,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by LLVM $as_me 3.4, which was
|
||||
It was created by LLVM $as_me 3.4.1, which was
|
||||
generated by GNU Autoconf 2.60. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -1934,16 +1938,32 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
|
||||
|
||||
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
#define LLVM_VERSION_MAJOR 3
|
||||
LLVM_VERSION_MAJOR=3
|
||||
LLVM_VERSION_MINOR=4
|
||||
LLVM_VERSION_PATCH=1
|
||||
LLVM_VERSION_SUFFIX=
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define LLVM_VERSION_MAJOR $LLVM_VERSION_MAJOR
|
||||
_ACEOF
|
||||
|
||||
|
||||
cat >>confdefs.h <<\_ACEOF
|
||||
#define LLVM_VERSION_MINOR 4
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define LLVM_VERSION_MINOR $LLVM_VERSION_MINOR
|
||||
_ACEOF
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define LLVM_VERSION_PATCH $LLVM_VERSION_PATCH
|
||||
_ACEOF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LLVM_COPYRIGHT="Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign."
|
||||
|
||||
|
||||
@ -8825,7 +8845,9 @@ if test "${enable_ltdl_install+set}" = set; then
|
||||
fi
|
||||
|
||||
|
||||
if test x"${enable_ltdl_install-no}" != xno; then
|
||||
|
||||
|
||||
if test x"${enable_ltdl_install-no}" != xno; then
|
||||
INSTALL_LTDL_TRUE=
|
||||
INSTALL_LTDL_FALSE='#'
|
||||
else
|
||||
@ -8833,7 +8855,9 @@ else
|
||||
INSTALL_LTDL_FALSE=
|
||||
fi
|
||||
|
||||
if test x"${enable_ltdl_convenience-no}" != xno; then
|
||||
|
||||
|
||||
if test x"${enable_ltdl_convenience-no}" != xno; then
|
||||
CONVENIENCE_LTDL_TRUE=
|
||||
CONVENIENCE_LTDL_FALSE='#'
|
||||
else
|
||||
@ -10582,7 +10606,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<EOF
|
||||
#line 10585 "configure"
|
||||
#line 10609 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
@ -22745,7 +22769,7 @@ exec 6>&1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by LLVM $as_me 3.4, which was
|
||||
This file was extended by LLVM $as_me 3.4.1, which was
|
||||
generated by GNU Autoconf 2.60. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -22798,7 +22822,7 @@ Report bugs to <bug-autoconf@gnu.org>."
|
||||
_ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF
|
||||
ac_cs_version="\\
|
||||
LLVM config.status 3.4
|
||||
LLVM config.status 3.4.1
|
||||
configured by $0, generated by GNU Autoconf 2.60,
|
||||
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
|
||||
|
||||
@ -23036,6 +23060,10 @@ LIBS!$LIBS$ac_delim
|
||||
build_alias!$build_alias$ac_delim
|
||||
host_alias!$host_alias$ac_delim
|
||||
target_alias!$target_alias$ac_delim
|
||||
LLVM_VERSION_MAJOR!$LLVM_VERSION_MAJOR$ac_delim
|
||||
LLVM_VERSION_MINOR!$LLVM_VERSION_MINOR$ac_delim
|
||||
LLVM_VERSION_PATCH!$LLVM_VERSION_PATCH$ac_delim
|
||||
LLVM_VERSION_SUFFIX!$LLVM_VERSION_SUFFIX$ac_delim
|
||||
LLVM_COPYRIGHT!$LLVM_COPYRIGHT$ac_delim
|
||||
CC!$CC$ac_delim
|
||||
CFLAGS!$CFLAGS$ac_delim
|
||||
@ -23092,10 +23120,6 @@ DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
|
||||
ENABLE_WERROR!$ENABLE_WERROR$ac_delim
|
||||
ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
|
||||
EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
|
||||
DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
|
||||
DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
|
||||
KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
|
||||
JIT!$JIT$ac_delim
|
||||
_ACEOF
|
||||
|
||||
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
|
||||
@ -23137,6 +23161,10 @@ _ACEOF
|
||||
ac_delim='%!_!# '
|
||||
for ac_last_try in false false false false false :; do
|
||||
cat >conf$$subs.sed <<_ACEOF
|
||||
DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
|
||||
DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
|
||||
KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
|
||||
JIT!$JIT$ac_delim
|
||||
TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
|
||||
ENABLE_DOCS!$ENABLE_DOCS$ac_delim
|
||||
ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
|
||||
@ -23230,10 +23258,6 @@ LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim
|
||||
LLVM_INCLUDEDIR!$LLVM_INCLUDEDIR$ac_delim
|
||||
LLVM_INFODIR!$LLVM_INFODIR$ac_delim
|
||||
LLVM_MANDIR!$LLVM_MANDIR$ac_delim
|
||||
LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
|
||||
BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
|
||||
ALL_BINDINGS!$ALL_BINDINGS$ac_delim
|
||||
OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
|
||||
_ACEOF
|
||||
|
||||
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
|
||||
@ -23275,6 +23299,10 @@ _ACEOF
|
||||
ac_delim='%!_!# '
|
||||
for ac_last_try in false false false false false :; do
|
||||
cat >conf$$subs.sed <<_ACEOF
|
||||
LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
|
||||
BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
|
||||
ALL_BINDINGS!$ALL_BINDINGS$ac_delim
|
||||
OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
|
||||
ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
|
||||
RPATH!$RPATH$ac_delim
|
||||
RDYNAMIC!$RDYNAMIC$ac_delim
|
||||
@ -23283,7 +23311,7 @@ LIBOBJS!$LIBOBJS$ac_delim
|
||||
LTLIBOBJS!$LTLIBOBJS$ac_delim
|
||||
_ACEOF
|
||||
|
||||
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 6; then
|
||||
if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 10; then
|
||||
break
|
||||
elif $ac_last_try; then
|
||||
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
|
||||
|
@ -3,9 +3,6 @@
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
#undef AC_APPLE_UNIVERSAL_BUILD
|
||||
|
||||
/* Bug report URL. */
|
||||
#undef BUG_REPORT_URL
|
||||
|
||||
@ -641,6 +638,9 @@
|
||||
/* Minor version of the LLVM API */
|
||||
#undef LLVM_VERSION_MINOR
|
||||
|
||||
/* Patch version of the LLVM API */
|
||||
#undef LLVM_VERSION_PATCH
|
||||
|
||||
/* Define if the OS needs help to load dependent libraries for dlopen(). */
|
||||
#undef LTDL_DLOPEN_DEPLIBS
|
||||
|
||||
@ -673,9 +673,6 @@
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
@ -700,18 +697,6 @@
|
||||
/* Type of 1st arg on ELM Callback */
|
||||
#undef WIN32_ELMCB_PCSTR
|
||||
|
||||
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
|
||||
significant byte first (like Motorola and SPARC, unlike Intel). */
|
||||
#if defined AC_APPLE_UNIVERSAL_BUILD
|
||||
# if defined __BIG_ENDIAN__
|
||||
# define WORDS_BIGENDIAN 1
|
||||
# endif
|
||||
#else
|
||||
# ifndef WORDS_BIGENDIAN
|
||||
# undef WORDS_BIGENDIAN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
#undef const
|
||||
|
||||
|
@ -1758,68 +1758,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_gather_d_pd : GCCBuiltin<"__builtin_ia32_gatherd_pd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
|
||||
Intrinsic<[llvm_v2f64_ty],
|
||||
[llvm_v2f64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_pd_256 : GCCBuiltin<"__builtin_ia32_gatherq_pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty],
|
||||
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_ps : GCCBuiltin<"__builtin_ia32_gatherd_ps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_ps_256 : GCCBuiltin<"__builtin_ia32_gatherd_ps256">,
|
||||
Intrinsic<[llvm_v8f32_ty],
|
||||
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_ps : GCCBuiltin<"__builtin_ia32_gatherq_ps">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
|
||||
Intrinsic<[llvm_v4f32_ty],
|
||||
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
|
||||
Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
|
||||
Intrinsic<[llvm_v4i64_ty],
|
||||
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
|
||||
Intrinsic<[llvm_v8i32_ty],
|
||||
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
|
||||
Intrinsic<[llvm_v4i32_ty],
|
||||
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
@ -2909,28 +2909,28 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_gather_dpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_dps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i16_ty,
|
||||
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_qpd_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
|
||||
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_qps_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqps512">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i8_ty,
|
||||
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather_dpd_512 : GCCBuiltin<"__builtin_ia32_gatherdpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_dps_512 : GCCBuiltin<"__builtin_ia32_gatherdps512">,
|
||||
Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_qpd_512 : GCCBuiltin<"__builtin_ia32_gatherqpd512">,
|
||||
Intrinsic<[llvm_v8f64_ty], [llvm_v8i64_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty],
|
||||
@ -2938,12 +2938,12 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_gather_qps_512 : GCCBuiltin<"__builtin_ia32_gatherqps512">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8i64_ty, llvm_ptr_ty,
|
||||
llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather_dpq_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpq512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
|
||||
llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
def int_x86_avx512_gather_dpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherdpi512">,
|
||||
Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_i16_ty,
|
||||
llvm_v16i32_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
@ -2955,7 +2955,7 @@ let TargetPrefix = "x86" in {
|
||||
def int_x86_avx512_gather_qpi_mask_512 : GCCBuiltin<"__builtin_ia32_mask_gatherqpi512">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i8_ty,
|
||||
llvm_v8i64_ty, llvm_ptr_ty, llvm_i32_ty],
|
||||
[IntrReadMem]>;
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_x86_avx512_gather_dpq_512 : GCCBuiltin<"__builtin_ia32_gatherdpq512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty, llvm_ptr_ty,
|
||||
|
@ -266,13 +266,16 @@ namespace llvm {
|
||||
/// global as being a weak undefined symbol.
|
||||
const char *WeakRefDirective; // Defaults to NULL.
|
||||
|
||||
/// WeakDefDirective - This directive, if non-null, is used to declare a
|
||||
/// global as being a weak defined symbol.
|
||||
const char *WeakDefDirective; // Defaults to NULL.
|
||||
/// True if we have a directive to declare a global as being a weak
|
||||
/// defined symbol.
|
||||
bool HasWeakDefDirective; // Defaults to false.
|
||||
|
||||
/// LinkOnceDirective - This directive, if non-null is used to declare a
|
||||
/// global as being a weak defined symbol. This is used on cygwin/mingw.
|
||||
const char *LinkOnceDirective; // Defaults to NULL.
|
||||
/// True if we have a directive to declare a global as being a weak
|
||||
/// defined symbol that can be hidden (unexported).
|
||||
bool HasWeakDefCanBeHiddenDirective; // Defaults to false.
|
||||
|
||||
/// True if we have a .linkonce directive. This is used on cygwin/mingw.
|
||||
bool HasLinkOnceDirective; // Defaults to false.
|
||||
|
||||
/// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
|
||||
/// declare a symbol as having hidden visibility.
|
||||
@ -303,6 +306,10 @@ namespace llvm {
|
||||
/// uses relocations for references to other .debug_* sections.
|
||||
bool DwarfUsesRelocationsAcrossSections;
|
||||
|
||||
/// DwarfFDESymbolsUseAbsDiff - true if DWARF FDE symbol reference
|
||||
/// relocations should be replaced by an absolute difference.
|
||||
bool DwarfFDESymbolsUseAbsDiff;
|
||||
|
||||
/// DwarfRegNumForCFI - True if dwarf register numbers are printed
|
||||
/// instead of symbolic register names in .cfi_* directives.
|
||||
bool DwarfRegNumForCFI; // Defaults to false;
|
||||
@ -497,8 +504,11 @@ namespace llvm {
|
||||
bool hasIdentDirective() const { return HasIdentDirective; }
|
||||
bool hasNoDeadStrip() const { return HasNoDeadStrip; }
|
||||
const char *getWeakRefDirective() const { return WeakRefDirective; }
|
||||
const char *getWeakDefDirective() const { return WeakDefDirective; }
|
||||
const char *getLinkOnceDirective() const { return LinkOnceDirective; }
|
||||
bool hasWeakDefDirective() const { return HasWeakDefDirective; }
|
||||
bool hasWeakDefCanBeHiddenDirective() const {
|
||||
return HasWeakDefCanBeHiddenDirective;
|
||||
}
|
||||
bool hasLinkOnceDirective() const { return HasLinkOnceDirective; }
|
||||
|
||||
MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
|
||||
MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
|
||||
@ -528,6 +538,9 @@ namespace llvm {
|
||||
bool doesDwarfUseRelocationsAcrossSections() const {
|
||||
return DwarfUsesRelocationsAcrossSections;
|
||||
}
|
||||
bool doDwarfFDESymbolsUseAbsDiff() const {
|
||||
return DwarfFDESymbolsUseAbsDiff;
|
||||
}
|
||||
bool useDwarfRegNumForCFI() const {
|
||||
return DwarfRegNumForCFI;
|
||||
}
|
||||
|
@ -18,7 +18,10 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/CaptureTracking.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/Analysis/MemoryBuiltins.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -38,6 +41,12 @@
|
||||
#include <algorithm>
|
||||
using namespace llvm;
|
||||
|
||||
/// Cutoff after which to stop analysing a set of phi nodes potentially involved
|
||||
/// in a cycle. Because we are analysing 'through' phi nodes we need to be
|
||||
/// careful with value equivalence. We use reachability to make sure a value
|
||||
/// cannot be involved in a cycle.
|
||||
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Useful predicates
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -403,42 +412,6 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
|
||||
return V;
|
||||
}
|
||||
|
||||
/// GetIndexDifference - Dest and Src are the variable indices from two
|
||||
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src) {
|
||||
if (Src.empty()) return;
|
||||
|
||||
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
|
||||
const Value *V = Src[i].V;
|
||||
ExtensionKind Extension = Src[i].Extension;
|
||||
int64_t Scale = Src[i].Scale;
|
||||
|
||||
// Find V in Dest. This is N^2, but pointer indices almost never have more
|
||||
// than a few variable indexes.
|
||||
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
|
||||
if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
|
||||
|
||||
// If we found it, subtract off Scale V's from the entry in Dest. If it
|
||||
// goes to zero, remove the entry.
|
||||
if (Dest[j].Scale != Scale)
|
||||
Dest[j].Scale -= Scale;
|
||||
else
|
||||
Dest.erase(Dest.begin()+j);
|
||||
Scale = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we didn't consume this entry, add it to the end of the Dest list.
|
||||
if (Scale) {
|
||||
VariableGEPIndex Entry = { V, Extension, -Scale };
|
||||
Dest.push_back(Entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// BasicAliasAnalysis Pass
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -492,6 +465,7 @@ namespace {
|
||||
// SmallDenseMap if it ever grows larger.
|
||||
// FIXME: This should really be shrink_to_inline_capacity_and_clear().
|
||||
AliasCache.shrink_and_clear();
|
||||
VisitedPhiBBs.clear();
|
||||
return Alias;
|
||||
}
|
||||
|
||||
@ -532,9 +506,39 @@ namespace {
|
||||
typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
|
||||
AliasCacheTy AliasCache;
|
||||
|
||||
/// \brief Track phi nodes we have visited. When interpret "Value" pointer
|
||||
/// equality as value equality we need to make sure that the "Value" is not
|
||||
/// part of a cycle. Otherwise, two uses could come from different
|
||||
/// "iterations" of a cycle and see different values for the same "Value"
|
||||
/// pointer.
|
||||
/// The following example shows the problem:
|
||||
/// %p = phi(%alloca1, %addr2)
|
||||
/// %l = load %ptr
|
||||
/// %addr1 = gep, %alloca2, 0, %l
|
||||
/// %addr2 = gep %alloca2, 0, (%l + 1)
|
||||
/// alias(%p, %addr1) -> MayAlias !
|
||||
/// store %l, ...
|
||||
SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
|
||||
|
||||
// Visited - Track instructions visited by pointsToConstantMemory.
|
||||
SmallPtrSet<const Value*, 16> Visited;
|
||||
|
||||
/// \brief Check whether two Values can be considered equivalent.
|
||||
///
|
||||
/// In addition to pointer equivalence of \p V1 and \p V2 this checks
|
||||
/// whether they can not be part of a cycle in the value graph by looking at
|
||||
/// all visited phi nodes an making sure that the phis cannot reach the
|
||||
/// value. We have to do this because we are looking through phi nodes (That
|
||||
/// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
|
||||
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
|
||||
|
||||
/// \brief Dest and Src are the variable indices from two decomposed
|
||||
/// GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src);
|
||||
|
||||
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
|
||||
// instruction against another.
|
||||
AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
|
||||
@ -1005,7 +1009,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
|
||||
return NoAlias;
|
||||
}
|
||||
} else {
|
||||
if (V1Size != UnknownSize) {
|
||||
// We have the situation where:
|
||||
// + +
|
||||
// | BaseOffset |
|
||||
// ---------------->|
|
||||
// |-->V1Size |-------> V2Size
|
||||
// GEP1 V2
|
||||
// We need to know that V2Size is not unknown, otherwise we might have
|
||||
// stripped a gep with negative index ('gep <ptr>, -1, ...).
|
||||
if (V1Size != UnknownSize && V2Size != UnknownSize) {
|
||||
if (-(uint64_t)GEP1BaseOffset < V1Size)
|
||||
return PartialAlias;
|
||||
return NoAlias;
|
||||
@ -1094,6 +1106,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
|
||||
const MDNode *PNTBAAInfo,
|
||||
const Value *V2, uint64_t V2Size,
|
||||
const MDNode *V2TBAAInfo) {
|
||||
// Track phi nodes we have visited. We use this information when we determine
|
||||
// value equivalence.
|
||||
VisitedPhiBBs.insert(PN->getParent());
|
||||
|
||||
// If the values are PHIs in the same block, we can do a more precise
|
||||
// as well as efficient check: just check for aliases between the values
|
||||
// on corresponding edges.
|
||||
@ -1187,7 +1203,13 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
|
||||
V2 = V2->stripPointerCasts();
|
||||
|
||||
// Are we checking for alias of the same value?
|
||||
if (V1 == V2) return MustAlias;
|
||||
// Because we look 'through' phi nodes we could look at "Value" pointers from
|
||||
// different iterations. We must therefore make sure that this is not the
|
||||
// case. The function isValueEqualInPotentialCycles ensures that this cannot
|
||||
// happen by looking at the visited phi nodes and making sure they cannot
|
||||
// reach the value.
|
||||
if (isValueEqualInPotentialCycles(V1, V2))
|
||||
return MustAlias;
|
||||
|
||||
if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
|
||||
return NoAlias; // Scalars cannot alias each other
|
||||
@ -1307,3 +1329,71 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
|
||||
Location(V2, V2Size, V2TBAAInfo));
|
||||
return AliasCache[Locs] = Result;
|
||||
}
|
||||
|
||||
bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
|
||||
const Value *V2) {
|
||||
if (V != V2)
|
||||
return false;
|
||||
|
||||
const Instruction *Inst = dyn_cast<Instruction>(V);
|
||||
if (!Inst)
|
||||
return true;
|
||||
|
||||
if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
|
||||
return false;
|
||||
|
||||
// Use dominance or loop info if available.
|
||||
DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
|
||||
LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
|
||||
|
||||
// Make sure that the visited phis cannot reach the Value. This ensures that
|
||||
// the Values cannot come from different iterations of a potential cycle the
|
||||
// phi nodes could be involved in.
|
||||
for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
|
||||
PE = VisitedPhiBBs.end();
|
||||
PI != PE; ++PI)
|
||||
if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// GetIndexDifference - Dest and Src are the variable indices from two
|
||||
/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
|
||||
/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
|
||||
/// difference between the two pointers.
|
||||
void BasicAliasAnalysis::GetIndexDifference(
|
||||
SmallVectorImpl<VariableGEPIndex> &Dest,
|
||||
const SmallVectorImpl<VariableGEPIndex> &Src) {
|
||||
if (Src.empty())
|
||||
return;
|
||||
|
||||
for (unsigned i = 0, e = Src.size(); i != e; ++i) {
|
||||
const Value *V = Src[i].V;
|
||||
ExtensionKind Extension = Src[i].Extension;
|
||||
int64_t Scale = Src[i].Scale;
|
||||
|
||||
// Find V in Dest. This is N^2, but pointer indices almost never have more
|
||||
// than a few variable indexes.
|
||||
for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
|
||||
if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
|
||||
Dest[j].Extension != Extension)
|
||||
continue;
|
||||
|
||||
// If we found it, subtract off Scale V's from the entry in Dest. If it
|
||||
// goes to zero, remove the entry.
|
||||
if (Dest[j].Scale != Scale)
|
||||
Dest[j].Scale -= Scale;
|
||||
else
|
||||
Dest.erase(Dest.begin() + j);
|
||||
Scale = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// If we didn't consume this entry, add it to the end of the Dest list.
|
||||
if (Scale) {
|
||||
VariableGEPIndex Entry = { V, Extension, -Scale };
|
||||
Dest.push_back(Entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -187,15 +187,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,
|
||||
|
||||
if (AddUserToIVUsers) {
|
||||
// Okay, we found a user that we cannot reduce.
|
||||
IVUses.push_back(new IVStrideUse(this, User, I));
|
||||
IVStrideUse &NewUse = IVUses.back();
|
||||
IVStrideUse &NewUse = AddUser(User, I);
|
||||
// Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
|
||||
// The regular return value here is discarded; instead of recording
|
||||
// it, we just recompute it when we need it.
|
||||
const SCEV *OriginalISE = ISE;
|
||||
ISE = TransformForPostIncUse(NormalizeAutodetect,
|
||||
ISE, User, I,
|
||||
NewUse.PostIncLoops,
|
||||
*SE, *DT);
|
||||
|
||||
// PostIncNormalization effectively simplifies the expression under
|
||||
// pre-increment assumptions. Those assumptions (no wrapping) might not
|
||||
// hold for the post-inc value. Catch such cases by making sure the
|
||||
// transformation is invertible.
|
||||
if (OriginalISE != ISE) {
|
||||
const SCEV *DenormalizedISE =
|
||||
TransformForPostIncUse(Denormalize, ISE, User, I,
|
||||
NewUse.PostIncLoops, *SE, *DT);
|
||||
|
||||
// If we normalized the expression, but denormalization doesn't give the
|
||||
// original one, discard this user.
|
||||
if (OriginalISE != DenormalizedISE) {
|
||||
DEBUG(dbgs() << " DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
|
||||
<< *ISE << '\n');
|
||||
IVUses.pop_back();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
DEBUG(if (SE->getSCEV(I) != ISE)
|
||||
dbgs() << " NORMALIZED TO: " << *ISE << '\n');
|
||||
}
|
||||
|
@ -6218,7 +6218,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
|
||||
// LHS' type is checked for above.
|
||||
if (getTypeSizeInBits(LHS->getType()) >
|
||||
getTypeSizeInBits(FoundLHS->getType())) {
|
||||
if (CmpInst::isSigned(Pred)) {
|
||||
if (CmpInst::isSigned(FoundPred)) {
|
||||
FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
|
||||
FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
|
||||
} else {
|
||||
|
@ -222,13 +222,14 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
|
||||
case GlobalValue::WeakAnyLinkage:
|
||||
case GlobalValue::WeakODRLinkage:
|
||||
case GlobalValue::LinkerPrivateWeakLinkage:
|
||||
if (MAI->getWeakDefDirective() != 0) {
|
||||
if (MAI->hasWeakDefDirective()) {
|
||||
// .globl _foo
|
||||
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
|
||||
|
||||
bool CanBeHidden = false;
|
||||
|
||||
if (Linkage == GlobalValue::LinkOnceODRLinkage) {
|
||||
if (Linkage == GlobalValue::LinkOnceODRLinkage &&
|
||||
MAI->hasWeakDefCanBeHiddenDirective()) {
|
||||
if (GV->hasUnnamedAddr()) {
|
||||
CanBeHidden = true;
|
||||
} else {
|
||||
@ -243,7 +244,7 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
|
||||
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
|
||||
else
|
||||
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
|
||||
} else if (MAI->getLinkOnceDirective() != 0) {
|
||||
} else if (MAI->hasLinkOnceDirective()) {
|
||||
// .globl _foo
|
||||
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
|
||||
//NOTE: linkonce is handled by the section the symbol was assigned to.
|
||||
|
@ -8547,7 +8547,10 @@ struct MemOpLink {
|
||||
// base ptr.
|
||||
struct ConsecutiveMemoryChainSorter {
|
||||
bool operator()(MemOpLink LHS, MemOpLink RHS) {
|
||||
return LHS.OffsetFromBase < RHS.OffsetFromBase;
|
||||
return
|
||||
LHS.OffsetFromBase < RHS.OffsetFromBase ||
|
||||
(LHS.OffsetFromBase == RHS.OffsetFromBase &&
|
||||
LHS.SequenceNum > RHS.SequenceNum);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -210,6 +210,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
|
||||
case ISD::SRL:
|
||||
case ISD::ROTL:
|
||||
case ISD::ROTR:
|
||||
case ISD::BSWAP:
|
||||
case ISD::CTLZ:
|
||||
case ISD::CTTZ:
|
||||
case ISD::CTLZ_ZERO_UNDEF:
|
||||
|
@ -219,8 +219,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
|
||||
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
|
||||
bool Cluster = false;
|
||||
SDNode *Base = Node;
|
||||
// This algorithm requires a reasonably low use count before finding a match
|
||||
// to avoid uselessly blowing up compile time in large blocks.
|
||||
unsigned UseCount = 0;
|
||||
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
|
||||
I != E; ++I) {
|
||||
I != E && UseCount < 100; ++I, ++UseCount) {
|
||||
SDNode *User = *I;
|
||||
if (User == Node || !Visited.insert(User))
|
||||
continue;
|
||||
@ -237,6 +240,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
|
||||
if (Offset2 < Offset1)
|
||||
Base = User;
|
||||
Cluster = true;
|
||||
// Reset UseCount to allow more matches.
|
||||
UseCount = 0;
|
||||
}
|
||||
|
||||
if (!Cluster)
|
||||
|
@ -76,8 +76,9 @@ MCAsmInfo::MCAsmInfo() {
|
||||
HasIdentDirective = false;
|
||||
HasNoDeadStrip = false;
|
||||
WeakRefDirective = 0;
|
||||
WeakDefDirective = 0;
|
||||
LinkOnceDirective = 0;
|
||||
HasWeakDefDirective = false;
|
||||
HasWeakDefCanBeHiddenDirective = false;
|
||||
HasLinkOnceDirective = false;
|
||||
HiddenVisibilityAttr = MCSA_Hidden;
|
||||
HiddenDeclarationVisibilityAttr = MCSA_Hidden;
|
||||
ProtectedVisibilityAttr = MCSA_Protected;
|
||||
@ -85,6 +86,7 @@ MCAsmInfo::MCAsmInfo() {
|
||||
SupportsDebugInformation = false;
|
||||
ExceptionsType = ExceptionHandling::None;
|
||||
DwarfUsesRelocationsAcrossSections = true;
|
||||
DwarfFDESymbolsUseAbsDiff = false;
|
||||
DwarfRegNumForCFI = false;
|
||||
HasMicrosoftFastStdCallMangling = false;
|
||||
NeedsDwarfSectionOffsetDirective = false;
|
||||
|
@ -27,7 +27,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
|
||||
HasSingleParameterDotFile = false;
|
||||
PrivateGlobalPrefix = "L"; // Prefix for private global symbols
|
||||
WeakRefDirective = "\t.weak\t";
|
||||
LinkOnceDirective = "\t.linkonce discard\n";
|
||||
HasLinkOnceDirective = true;
|
||||
|
||||
// Doesn't support visibility:
|
||||
HiddenVisibilityAttr = HiddenDeclarationVisibilityAttr = MCSA_Invalid;
|
||||
|
@ -36,7 +36,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
|
||||
InlineAsmEnd = " InlineAsm End";
|
||||
|
||||
// Directives:
|
||||
WeakDefDirective = "\t.weak_definition ";
|
||||
HasWeakDefDirective = true;
|
||||
HasWeakDefCanBeHiddenDirective = true;
|
||||
WeakRefDirective = "\t.weak_reference ";
|
||||
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
|
||||
HasMachoZeroFillDirective = true; // Uses .zerofill
|
||||
|
@ -836,8 +836,9 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
|
||||
unsigned symbolEncoding, const char *comment = 0) {
|
||||
static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
|
||||
unsigned symbolEncoding, bool isEH,
|
||||
const char *comment = 0) {
|
||||
MCContext &context = streamer.getContext();
|
||||
const MCAsmInfo *asmInfo = context.getAsmInfo();
|
||||
const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
|
||||
@ -845,7 +846,10 @@ static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
|
||||
streamer);
|
||||
unsigned size = getSizeForEncoding(streamer, symbolEncoding);
|
||||
if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
|
||||
streamer.EmitAbsValue(v, size);
|
||||
if (asmInfo->doDwarfFDESymbolsUseAbsDiff() && isEH)
|
||||
streamer.EmitAbsValue(v, size);
|
||||
else
|
||||
streamer.EmitValue(v, size);
|
||||
}
|
||||
|
||||
static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
|
||||
@ -1344,7 +1348,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
|
||||
unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
|
||||
: (unsigned)dwarf::DW_EH_PE_absptr;
|
||||
unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
|
||||
EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location");
|
||||
EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");
|
||||
|
||||
// PC Range
|
||||
const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
|
||||
@ -1364,8 +1368,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
|
||||
|
||||
// Augmentation Data
|
||||
if (frame.Lsda)
|
||||
EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
|
||||
"Language Specific Data Area");
|
||||
EmitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true,
|
||||
"Language Specific Data Area");
|
||||
}
|
||||
|
||||
// Call Frame Instructions
|
||||
|
@ -4292,6 +4292,10 @@ bool AsmParser::parseMSInlineAsm(
|
||||
break;
|
||||
}
|
||||
case AOK_DotOperator:
|
||||
// Insert the dot if the user omitted it.
|
||||
OS.flush();
|
||||
if (AsmStringIR.at(AsmStringIR.size() - 1) != '.')
|
||||
OS << '.';
|
||||
OS << (*I).Val;
|
||||
break;
|
||||
}
|
||||
|
@ -31,12 +31,8 @@ using namespace llvm;
|
||||
|
||||
static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
|
||||
const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
|
||||
|
||||
if (Subtarget->isTargetLinux())
|
||||
return new AArch64LinuxTargetObjectFile();
|
||||
if (Subtarget->isTargetELF())
|
||||
return new TargetLoweringObjectFileELF();
|
||||
llvm_unreachable("unknown subtarget type");
|
||||
assert (Subtarget->isTargetELF() && "unknown subtarget type");
|
||||
return new AArch64ElfTargetObjectFile();
|
||||
}
|
||||
|
||||
AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
@ -2782,7 +2778,7 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
|
||||
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
|
||||
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
|
||||
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
|
||||
|
||||
// We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
|
||||
// rather than just 8.
|
||||
|
@ -2587,6 +2587,7 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
|
||||
pat, itin> {
|
||||
let mayStore = 1;
|
||||
let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
|
||||
let Constraints = "@earlyclobber $Rs";
|
||||
}
|
||||
|
||||
multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
|
||||
|
@ -22,3 +22,10 @@ AArch64LinuxTargetObjectFile::Initialize(MCContext &Ctx,
|
||||
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
|
||||
InitializeELF(TM.Options.UseInitArray);
|
||||
}
|
||||
|
||||
void
|
||||
AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx,
|
||||
const TargetMachine &TM) {
|
||||
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
|
||||
InitializeELF(TM.Options.UseInitArray);
|
||||
}
|
||||
|
@ -20,8 +20,12 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// AArch64LinuxTargetObjectFile - This implementation is used for linux
|
||||
/// AArch64.
|
||||
/// AArch64ElfTargetObjectFile - This implementation is used for ELF
|
||||
/// AArch64 targets.
|
||||
class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF {
|
||||
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
|
||||
};
|
||||
|
||||
class AArch64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
|
||||
virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
|
||||
};
|
||||
|
@ -418,7 +418,8 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
|
||||
if (!MO.isReg() || !MO.isUse())
|
||||
continue;
|
||||
if (!usesRegClass(MO, &ARM::DPRRegClass) &&
|
||||
!usesRegClass(MO, &ARM::QPRRegClass))
|
||||
!usesRegClass(MO, &ARM::QPRRegClass) &&
|
||||
!usesRegClass(MO, &ARM::DPairRegClass)) // Treat DPair as QPR
|
||||
continue;
|
||||
|
||||
Defs.push_back(MO.getReg());
|
||||
@ -538,7 +539,10 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
|
||||
InsertPt++;
|
||||
unsigned Out;
|
||||
|
||||
if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
|
||||
// DPair has the same length as QPR and also has two DPRs as subreg.
|
||||
// Treat DPair as QPR.
|
||||
if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass) ||
|
||||
MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPairRegClass)) {
|
||||
unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
|
||||
ARM::dsub_0, &ARM::DPRRegClass);
|
||||
unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
|
||||
@ -571,7 +575,9 @@ A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
|
||||
default: llvm_unreachable("Unknown preferred lane!");
|
||||
}
|
||||
|
||||
bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
|
||||
// Treat DPair as QPR
|
||||
bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass) ||
|
||||
usesRegClass(MI->getOperand(0), &ARM::DPairRegClass);
|
||||
|
||||
Out = createImplicitDef(MBB, InsertPt, DL);
|
||||
Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
|
||||
|
@ -3684,6 +3684,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
case ARM::VLD3d16Pseudo:
|
||||
case ARM::VLD3d32Pseudo:
|
||||
case ARM::VLD1d64TPseudo:
|
||||
case ARM::VLD1d64TPseudoWB_fixed:
|
||||
case ARM::VLD3d8Pseudo_UPD:
|
||||
case ARM::VLD3d16Pseudo_UPD:
|
||||
case ARM::VLD3d32Pseudo_UPD:
|
||||
@ -3700,6 +3701,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||
case ARM::VLD4d16Pseudo:
|
||||
case ARM::VLD4d32Pseudo:
|
||||
case ARM::VLD1d64QPseudo:
|
||||
case ARM::VLD1d64QPseudoWB_fixed:
|
||||
case ARM::VLD4d8Pseudo_UPD:
|
||||
case ARM::VLD4d16Pseudo_UPD:
|
||||
case ARM::VLD4d32Pseudo_UPD:
|
||||
|
@ -136,7 +136,9 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
|
||||
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
|
||||
|
||||
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
|
||||
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
|
||||
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
|
||||
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
|
||||
|
||||
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
|
||||
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
|
||||
@ -1071,6 +1073,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
case ARM::VLD3d16Pseudo:
|
||||
case ARM::VLD3d32Pseudo:
|
||||
case ARM::VLD1d64TPseudo:
|
||||
case ARM::VLD1d64TPseudoWB_fixed:
|
||||
case ARM::VLD3d8Pseudo_UPD:
|
||||
case ARM::VLD3d16Pseudo_UPD:
|
||||
case ARM::VLD3d32Pseudo_UPD:
|
||||
@ -1087,6 +1090,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
case ARM::VLD4d16Pseudo:
|
||||
case ARM::VLD4d32Pseudo:
|
||||
case ARM::VLD1d64QPseudo:
|
||||
case ARM::VLD1d64QPseudoWB_fixed:
|
||||
case ARM::VLD4d8Pseudo_UPD:
|
||||
case ARM::VLD4d16Pseudo_UPD:
|
||||
case ARM::VLD4d32Pseudo_UPD:
|
||||
|
@ -1673,9 +1673,61 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
|
||||
return CurDAG->getTargetConstant(Alignment, MVT::i32);
|
||||
}
|
||||
|
||||
static bool isVLDfixed(unsigned Opc)
|
||||
{
|
||||
switch (Opc) {
|
||||
default: return false;
|
||||
case ARM::VLD1d8wb_fixed : return true;
|
||||
case ARM::VLD1d16wb_fixed : return true;
|
||||
case ARM::VLD1d64Qwb_fixed : return true;
|
||||
case ARM::VLD1d32wb_fixed : return true;
|
||||
case ARM::VLD1d64wb_fixed : return true;
|
||||
case ARM::VLD1d64TPseudoWB_fixed : return true;
|
||||
case ARM::VLD1d64QPseudoWB_fixed : return true;
|
||||
case ARM::VLD1q8wb_fixed : return true;
|
||||
case ARM::VLD1q16wb_fixed : return true;
|
||||
case ARM::VLD1q32wb_fixed : return true;
|
||||
case ARM::VLD1q64wb_fixed : return true;
|
||||
case ARM::VLD2d8wb_fixed : return true;
|
||||
case ARM::VLD2d16wb_fixed : return true;
|
||||
case ARM::VLD2d32wb_fixed : return true;
|
||||
case ARM::VLD2q8PseudoWB_fixed : return true;
|
||||
case ARM::VLD2q16PseudoWB_fixed : return true;
|
||||
case ARM::VLD2q32PseudoWB_fixed : return true;
|
||||
case ARM::VLD2DUPd8wb_fixed : return true;
|
||||
case ARM::VLD2DUPd16wb_fixed : return true;
|
||||
case ARM::VLD2DUPd32wb_fixed : return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isVSTfixed(unsigned Opc)
|
||||
{
|
||||
switch (Opc) {
|
||||
default: return false;
|
||||
case ARM::VST1d8wb_fixed : return true;
|
||||
case ARM::VST1d16wb_fixed : return true;
|
||||
case ARM::VST1d32wb_fixed : return true;
|
||||
case ARM::VST1d64wb_fixed : return true;
|
||||
case ARM::VST1q8wb_fixed : return true;
|
||||
case ARM::VST1q16wb_fixed : return true;
|
||||
case ARM::VST1q32wb_fixed : return true;
|
||||
case ARM::VST1q64wb_fixed : return true;
|
||||
case ARM::VST1d64TPseudoWB_fixed : return true;
|
||||
case ARM::VST1d64QPseudoWB_fixed : return true;
|
||||
case ARM::VST2d8wb_fixed : return true;
|
||||
case ARM::VST2d16wb_fixed : return true;
|
||||
case ARM::VST2d32wb_fixed : return true;
|
||||
case ARM::VST2q8PseudoWB_fixed : return true;
|
||||
case ARM::VST2q16PseudoWB_fixed : return true;
|
||||
case ARM::VST2q32PseudoWB_fixed : return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the register stride update opcode of a VLD/VST instruction that
|
||||
// is otherwise equivalent to the given fixed stride updating instruction.
|
||||
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
|
||||
assert((isVLDfixed(Opc) || isVSTfixed(Opc))
|
||||
&& "Incorrect fixed stride updating instruction.");
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
|
||||
@ -1686,6 +1738,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
|
||||
case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
|
||||
case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
|
||||
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
|
||||
case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
|
||||
case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
|
||||
case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
|
||||
case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
|
||||
|
||||
case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
|
||||
case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
|
||||
@ -1785,11 +1841,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
SDValue Inc = N->getOperand(AddrOpIdx + 1);
|
||||
// FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
|
||||
// case entirely when the rest are updated to that form, too.
|
||||
if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
|
||||
if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
|
||||
Opc = getVLDSTRegisterUpdateOpcode(Opc);
|
||||
// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
|
||||
// FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
|
||||
// check for that explicitly too. Horribly hacky, but temporary.
|
||||
if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
|
||||
if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
|
||||
!isa<ConstantSDNode>(Inc.getNode()))
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
}
|
||||
@ -1937,11 +1993,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
|
||||
// case entirely when the rest are updated to that form, too.
|
||||
if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
|
||||
Opc = getVLDSTRegisterUpdateOpcode(Opc);
|
||||
// We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
|
||||
// FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
|
||||
// check for that explicitly too. Horribly hacky, but temporary.
|
||||
if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
|
||||
!isa<ConstantSDNode>(Inc.getNode()))
|
||||
Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
|
||||
if (!isa<ConstantSDNode>(Inc.getNode()))
|
||||
Ops.push_back(Inc);
|
||||
else if (NumVecs > 2 && !isVSTfixed(Opc))
|
||||
Ops.push_back(Reg0);
|
||||
}
|
||||
Ops.push_back(SrcReg);
|
||||
Ops.push_back(Pred);
|
||||
@ -2834,7 +2891,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
|
||||
ARM::VLD3d16Pseudo_UPD,
|
||||
ARM::VLD3d32Pseudo_UPD,
|
||||
ARM::VLD1q64wb_fixed};
|
||||
ARM::VLD1d64TPseudoWB_fixed};
|
||||
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
|
||||
ARM::VLD3q16Pseudo_UPD,
|
||||
ARM::VLD3q32Pseudo_UPD };
|
||||
@ -2848,7 +2905,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
|
||||
ARM::VLD4d16Pseudo_UPD,
|
||||
ARM::VLD4d32Pseudo_UPD,
|
||||
ARM::VLD1q64wb_fixed};
|
||||
ARM::VLD1d64QPseudoWB_fixed};
|
||||
static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
|
||||
ARM::VLD4q16Pseudo_UPD,
|
||||
ARM::VLD4q32Pseudo_UPD };
|
||||
|
@ -730,6 +730,8 @@ defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
|
||||
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
|
||||
|
||||
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
|
||||
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
|
||||
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
|
||||
|
||||
// ...with 4 registers
|
||||
class VLD1D4<bits<4> op7_4, string Dt>
|
||||
@ -769,6 +771,8 @@ defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
|
||||
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
|
||||
|
||||
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
|
||||
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
|
||||
def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
|
||||
|
||||
// VLD2 : Vector Load (multiple 2-element structures)
|
||||
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
|
||||
@ -1671,7 +1675,7 @@ defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
|
||||
defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
|
||||
|
||||
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
|
||||
def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>;
|
||||
def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
|
||||
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
|
||||
|
||||
// ...with 4 registers
|
||||
@ -1714,7 +1718,7 @@ defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
|
||||
defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
|
||||
|
||||
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
|
||||
def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>;
|
||||
def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
|
||||
def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
|
||||
|
||||
// VST2 : Vector Store (multiple 2-element structures)
|
||||
|
@ -12,10 +12,14 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PPCMCAsmInfo.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void PPCMCAsmInfoDarwin::anchor() { }
|
||||
|
||||
/// This version of the constructor is here to maintain ABI compatibility with
|
||||
/// LLVM 3.4.0
|
||||
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
|
||||
if (is64Bit) {
|
||||
PointerSize = CalleeSaveStackSlotSize = 8;
|
||||
@ -32,6 +36,28 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
|
||||
SupportsDebugInformation= true; // Debug information.
|
||||
}
|
||||
|
||||
PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
|
||||
if (is64Bit) {
|
||||
PointerSize = CalleeSaveStackSlotSize = 8;
|
||||
}
|
||||
IsLittleEndian = false;
|
||||
|
||||
CommentString = ";";
|
||||
ExceptionsType = ExceptionHandling::DwarfCFI;
|
||||
|
||||
if (!is64Bit)
|
||||
Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
|
||||
|
||||
AssemblerDialect = 1; // New-Style mnemonics.
|
||||
SupportsDebugInformation= true; // Debug information.
|
||||
|
||||
// old assembler lacks some directives
|
||||
// FIXME: this should really be a check on the assembler characteristics
|
||||
// rather than OS version
|
||||
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
|
||||
HasWeakDefCanBeHiddenDirective = false;
|
||||
}
|
||||
|
||||
void PPCLinuxMCAsmInfo::anchor() { }
|
||||
|
||||
PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
|
||||
|
@ -18,11 +18,15 @@
|
||||
#include "llvm/MC/MCAsmInfoELF.h"
|
||||
|
||||
namespace llvm {
|
||||
class Triple;
|
||||
|
||||
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
|
||||
virtual void anchor();
|
||||
public:
|
||||
/// This version of the constructor is here to maintain ABI compatibility
|
||||
/// with LLVM 3.4.0.
|
||||
explicit PPCMCAsmInfoDarwin(bool is64Bit);
|
||||
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
|
||||
};
|
||||
|
||||
class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
|
||||
|
@ -72,7 +72,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
|
||||
|
||||
MCAsmInfo *MAI;
|
||||
if (TheTriple.isOSDarwin())
|
||||
MAI = new PPCMCAsmInfoDarwin(isPPC64);
|
||||
MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
|
||||
else
|
||||
MAI = new PPCLinuxMCAsmInfo(isPPC64);
|
||||
|
||||
|
@ -701,13 +701,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case PPC::SYNC:
|
||||
// In Book E sync is called msync, handle this special case here...
|
||||
if (Subtarget.isBookE()) {
|
||||
OutStreamer.EmitRawText(StringRef("\tmsync"));
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case PPC::LD:
|
||||
case PPC::STD:
|
||||
case PPC::LWA_32:
|
||||
|
@ -186,6 +186,13 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
|
||||
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
|
||||
return ITy->getBitWidth() > (Is32Bit ? 32 : 64);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
|
||||
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
|
||||
J != JE; ++J) {
|
||||
@ -352,13 +359,11 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
|
||||
CastInst *CI = cast<CastInst>(J);
|
||||
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
|
||||
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
|
||||
(TT.isArch32Bit() &&
|
||||
(CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
|
||||
CI->getDestTy()->getScalarType()->isIntegerTy(64))
|
||||
))
|
||||
isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
|
||||
isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
|
||||
return true;
|
||||
} else if (TT.isArch32Bit() &&
|
||||
J->getType()->getScalarType()->isIntegerTy(64) &&
|
||||
} else if (isLargeIntegerTy(TT.isArch32Bit(),
|
||||
J->getType()->getScalarType()) &&
|
||||
(J->getOpcode() == Instruction::UDiv ||
|
||||
J->getOpcode() == Instruction::SDiv ||
|
||||
J->getOpcode() == Instruction::URem ||
|
||||
|
@ -892,11 +892,13 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
|
||||
unsigned LoadOpc = PPC::LFD;
|
||||
|
||||
if (SrcVT == MVT::i32) {
|
||||
Addr.Offset = 4;
|
||||
if (!IsSigned)
|
||||
if (!IsSigned) {
|
||||
LoadOpc = PPC::LFIWZX;
|
||||
else if (PPCSubTarget.hasLFIWAX())
|
||||
Addr.Offset = 4;
|
||||
} else if (PPCSubTarget.hasLFIWAX()) {
|
||||
LoadOpc = PPC::LFIWAX;
|
||||
Addr.Offset = 4;
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *RC = &PPC::F8RCRegClass;
|
||||
|
@ -261,11 +261,11 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
|
||||
DebugLoc dl;
|
||||
|
||||
if (PPCLowering.getPointerTy() == MVT::i32) {
|
||||
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
|
||||
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
|
||||
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
|
||||
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
|
||||
} else {
|
||||
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RCRegClass);
|
||||
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
|
||||
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
|
||||
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
|
||||
}
|
||||
|
@ -2333,7 +2333,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
EVT ObjType = (ObjSize == 1 ? MVT::i8 :
|
||||
(ObjSize == 2 ? MVT::i16 : MVT::i32));
|
||||
Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo(FuncArg, CurArgOffset),
|
||||
MachinePointerInfo(FuncArg),
|
||||
ObjType, false, false, 0);
|
||||
} else {
|
||||
// For sizes that don't fit a truncating store (3, 5, 6, 7),
|
||||
@ -2345,7 +2345,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
|
||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo(FuncArg, ArgOffset),
|
||||
MachinePointerInfo(FuncArg),
|
||||
false, false, 0);
|
||||
}
|
||||
|
||||
@ -2369,7 +2369,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
||||
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo(FuncArg, ArgOffset),
|
||||
MachinePointerInfo(FuncArg, j),
|
||||
false, false, 0);
|
||||
MemOps.push_back(Store);
|
||||
++GPR_idx;
|
||||
@ -2665,8 +2665,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
||||
EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
|
||||
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo(FuncArg,
|
||||
CurArgOffset),
|
||||
MachinePointerInfo(FuncArg),
|
||||
ObjType, false, false, 0);
|
||||
MemOps.push_back(Store);
|
||||
++GPR_idx;
|
||||
@ -2690,7 +2689,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
|
||||
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
|
||||
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
|
||||
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
|
||||
MachinePointerInfo(FuncArg, ArgOffset),
|
||||
MachinePointerInfo(FuncArg, j),
|
||||
false, false, 0);
|
||||
MemOps.push_back(Store);
|
||||
++GPR_idx;
|
||||
|
@ -570,12 +570,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
|
||||
// update isStoreToStackSlot.
|
||||
|
||||
DebugLoc DL;
|
||||
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
|
||||
if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
|
||||
PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
|
||||
.addReg(SrcReg,
|
||||
getKillRegState(isKill)),
|
||||
FrameIdx));
|
||||
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
|
||||
} else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
|
||||
PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
|
||||
.addReg(SrcReg,
|
||||
getKillRegState(isKill)),
|
||||
@ -695,10 +697,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
|
||||
// Note: If additional load instructions are added here,
|
||||
// update isLoadFromStackSlot.
|
||||
|
||||
if (PPC::GPRCRegClass.hasSubClassEq(RC)) {
|
||||
if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
|
||||
PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
|
||||
DestReg), FrameIdx));
|
||||
} else if (PPC::G8RCRegClass.hasSubClassEq(RC)) {
|
||||
} else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
|
||||
PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
|
||||
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
|
||||
FrameIdx));
|
||||
} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
|
||||
|
@ -580,6 +580,7 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
|
||||
def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
|
||||
def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
|
||||
def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
|
||||
def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Multiclass Definitions.
|
||||
@ -1541,8 +1542,17 @@ def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
|
||||
"stmw $rS, $dst", LdStLMW, []>;
|
||||
|
||||
def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
|
||||
"sync $L", LdStSync, []>;
|
||||
def : Pat<(int_ppc_sync), (SYNC 0)>;
|
||||
"sync $L", LdStSync, []>, Requires<[IsNotBookE]>;
|
||||
|
||||
let isCodeGenOnly = 1 in {
|
||||
def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
|
||||
"msync", LdStSync, []>, Requires<[IsBookE]> {
|
||||
let L = 0;
|
||||
}
|
||||
}
|
||||
|
||||
def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PPC32 Arithmetic Instructions.
|
||||
@ -2284,7 +2294,8 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
|
||||
def : Pat<(f64 (fextend f32:$src)),
|
||||
(COPY_TO_REGCLASS $src, F8RC)>;
|
||||
|
||||
def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
|
||||
def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
|
||||
|
||||
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
|
||||
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
|
||||
@ -2373,10 +2384,10 @@ class PPCAsmPseudo<string asm, dag iops>
|
||||
|
||||
def : InstAlias<"sc", (SC 0)>;
|
||||
|
||||
def : InstAlias<"sync", (SYNC 0)>;
|
||||
def : InstAlias<"msync", (SYNC 0)>;
|
||||
def : InstAlias<"lwsync", (SYNC 1)>;
|
||||
def : InstAlias<"ptesync", (SYNC 2)>;
|
||||
def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
|
||||
def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
|
||||
def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
|
||||
|
||||
def : InstAlias<"wait", (WAIT 0)>;
|
||||
def : InstAlias<"waitrsv", (WAIT 1)>;
|
||||
|
@ -144,6 +144,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
|
||||
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
|
||||
}
|
||||
|
||||
// The full condition-code register. This is not modeled fully, but defined
|
||||
// here primarily, for compatibility with gcc, to allow the inline asm "cc"
|
||||
// clobber specification to work.
|
||||
def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
|
||||
let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
|
||||
}
|
||||
|
||||
// Link register
|
||||
def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
|
||||
//let Aliases = [LR] in
|
||||
@ -234,3 +241,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
|
||||
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
|
||||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
|
||||
let isAllocatable = 0;
|
||||
}
|
||||
|
||||
|
@ -126,22 +126,6 @@ class PPCSubtarget : public PPCGenSubtargetInfo {
|
||||
/// selection.
|
||||
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
||||
|
||||
/// getDataLayoutString - Return the pointer size and type alignment
|
||||
/// properties of this subtarget.
|
||||
const char *getDataLayoutString() const {
|
||||
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
|
||||
// documentation are wrong; these are correct (i.e. "what gcc does").
|
||||
if (isPPC64() && isSVR4ABI()) {
|
||||
if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
|
||||
return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
|
||||
else
|
||||
return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
|
||||
}
|
||||
|
||||
return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
|
||||
: "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
|
||||
}
|
||||
|
||||
/// \brief Reset the features for the PowerPC target.
|
||||
virtual void resetSubtargetFeatures(const MachineFunction *MF);
|
||||
private:
|
||||
|
@ -33,6 +33,43 @@ extern "C" void LLVMInitializePowerPCTarget() {
|
||||
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
|
||||
}
|
||||
|
||||
/// Return the datalayout string of a subtarget.
|
||||
static std::string getDataLayoutString(const PPCSubtarget &ST) {
|
||||
const Triple &T = ST.getTargetTriple();
|
||||
|
||||
// PPC is big endian
|
||||
std::string Ret = "E";
|
||||
|
||||
// PPC64 has 64 bit pointers, PPC32 has 32 bit pointers.
|
||||
if (ST.isPPC64())
|
||||
Ret += "-p:64:64";
|
||||
else
|
||||
Ret += "-p:32:32";
|
||||
|
||||
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
|
||||
// documentation are wrong; these are correct (i.e. "what gcc does").
|
||||
if (ST.isPPC64() || ST.isSVR4ABI())
|
||||
Ret += "-f64:64:64-i64:64:64";
|
||||
else
|
||||
Ret += "-f64:32:64";
|
||||
|
||||
// Set support for 128 floats depending on the ABI.
|
||||
if (!ST.isPPC64() && ST.isSVR4ABI())
|
||||
Ret += "-f128:64:128";
|
||||
|
||||
// Some ABIs support 128 bit vectors.
|
||||
if (ST.isPPC64() && ST.isSVR4ABI())
|
||||
Ret += "-v128:128:128";
|
||||
|
||||
// PPC64 has 32 and 64 bit register, PPC32 has only 32 bit ones.
|
||||
if (ST.isPPC64())
|
||||
Ret += "-n32:64";
|
||||
else
|
||||
Ret += "-n32";
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
||||
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
@ -41,7 +78,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
|
||||
bool is64Bit)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS, is64Bit),
|
||||
DL(Subtarget.getDataLayoutString()), InstrInfo(*this),
|
||||
DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
|
||||
FrameLowering(Subtarget), JITInfo(*this, is64Bit),
|
||||
TLInfo(*this), TSInfo(*this),
|
||||
InstrItins(Subtarget.getInstrItineraryData()) {
|
||||
|
@ -133,6 +133,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
|
||||
|
||||
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
|
||||
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
|
||||
|
||||
|
@ -388,6 +388,11 @@ class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat <
|
||||
|
||||
// Bitfield extract patterns
|
||||
|
||||
/*
|
||||
|
||||
XXX: The BFE pattern is not working correctly because the XForm is not being
|
||||
applied.
|
||||
|
||||
def legalshift32 : ImmLeaf <i32, [{return Imm >=0 && Imm < 32;}]>;
|
||||
def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}],
|
||||
SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>;
|
||||
@ -397,6 +402,8 @@ class BFEPattern <Instruction BFE> : Pat <
|
||||
(BFE $x, $y, $z)
|
||||
>;
|
||||
|
||||
*/
|
||||
|
||||
// rotr pattern
|
||||
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
|
||||
(rotr i32:$src0, i32:$src1),
|
||||
|
@ -13,7 +13,6 @@
|
||||
using namespace llvm;
|
||||
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
|
||||
HasSingleParameterDotFile = false;
|
||||
WeakDefDirective = 0;
|
||||
//===------------------------------------------------------------------===//
|
||||
HasSubsectionsViaSymbols = true;
|
||||
HasMachoZeroFillDirective = false;
|
||||
@ -58,7 +57,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
HasNoDeadStrip = true;
|
||||
WeakRefDirective = ".weakref\t";
|
||||
LinkOnceDirective = 0;
|
||||
//===--- Dwarf Emission Directives -----------------------------------===//
|
||||
HasLEB128 = true;
|
||||
SupportsDebugInformation = true;
|
||||
|
@ -356,6 +356,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
|
||||
DEBUG(dbgs() << CfCount << ":"; I->dump(););
|
||||
FetchClauses.push_back(MakeFetchClause(MBB, I));
|
||||
CfCount++;
|
||||
LastAlu.back() = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -716,7 +716,13 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the last instruction in the block.
|
||||
// Remove successive JUMP
|
||||
while (I != MBB.begin() && llvm::prior(I)->getOpcode() == AMDGPU::JUMP) {
|
||||
MachineBasicBlock::iterator PriorI = llvm::prior(I);
|
||||
if (AllowModify)
|
||||
I->removeFromParent();
|
||||
I = PriorI;
|
||||
}
|
||||
MachineInstr *LastInst = I;
|
||||
|
||||
// If there is only one terminator instruction, process it.
|
||||
|
@ -1516,7 +1516,9 @@ let Predicates = [isEGorCayman] in {
|
||||
i32:$src2))],
|
||||
VecALU
|
||||
>;
|
||||
def : BFEPattern <BFE_UINT_eg>;
|
||||
// XXX: This pattern is broken, disabling for now. See comment in
|
||||
// AMDGPUInstructions.td for more info.
|
||||
// def : BFEPattern <BFE_UINT_eg>;
|
||||
|
||||
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
|
||||
defm : BFIPatterns <BFI_INT_eg>;
|
||||
@ -1636,7 +1638,6 @@ class R600_LDS_1A <bits<6> lds_op, string name, list<dag> pattern> : R600_LDS <
|
||||
let src2 = 0;
|
||||
let src2_rel = 0;
|
||||
|
||||
let Defs = [OQAP];
|
||||
let usesCustomInserter = 1;
|
||||
let LDS_1A = 1;
|
||||
let DisableEncoding = "$dst";
|
||||
@ -1672,7 +1673,6 @@ class R600_LDS_1A1D_RET <bits<6> lds_op, string name, list<dag> pattern> :
|
||||
let BaseOp = name;
|
||||
let usesCustomInserter = 1;
|
||||
let DisableEncoding = "$dst";
|
||||
let Defs = [OQAP];
|
||||
}
|
||||
|
||||
class R600_LDS_1A2D <bits<6> lds_op, string name, list<dag> pattern> :
|
||||
|
@ -187,7 +187,7 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
|
||||
DstRC == &AMDGPU::M0RegRegClass)
|
||||
return false;
|
||||
|
||||
SrcRC = inferRegClassFromDef(TRI, MRI, SrcReg, SrcSubReg);
|
||||
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
|
||||
return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
|
||||
}
|
||||
|
||||
|
@ -314,6 +314,12 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
|
||||
|
||||
Counters Result = ZeroCounts;
|
||||
|
||||
// S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
|
||||
// but we also want to wait for any other outstanding transfers before
|
||||
// signalling other hardware blocks
|
||||
if (MI.getOpcode() == AMDGPU::S_SENDMSG)
|
||||
return LastIssued;
|
||||
|
||||
// For each register affected by this
|
||||
// instruction increase the result sequence
|
||||
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
|
||||
|
@ -290,10 +290,10 @@ multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern,
|
||||
: VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>;
|
||||
|
||||
multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
|
||||
string revOp = opName> {
|
||||
RegisterClass src0_rc, string revOp = opName> {
|
||||
|
||||
def _e32 : VOP2 <
|
||||
op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1),
|
||||
op, (outs VReg_32:$dst), (ins src0_rc:$src0, VReg_32:$src1),
|
||||
opName#"_e32 $dst, $src0, $src1", pattern
|
||||
>, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
|
||||
|
||||
@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
|
||||
|
||||
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
|
||||
|
||||
let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
|
||||
mayLoad = 1 in {
|
||||
let lds = 0, mayLoad = 1 in {
|
||||
|
||||
let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
|
||||
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_32:$vaddr),
|
||||
asm#" $vdata, $srsrc + $vaddr", []>;
|
||||
}
|
||||
let addr64 = 0 in {
|
||||
|
||||
let offen = 0, idxen = 1, addr64 = 0 in {
|
||||
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
|
||||
asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
|
||||
}
|
||||
let offen = 0, idxen = 0 in {
|
||||
def _OFFSET : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||
i1imm:$slc, i1imm:$tfe),
|
||||
asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
}
|
||||
|
||||
let offen = 0, idxen = 0, addr64 = 1 in {
|
||||
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
||||
asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
||||
}
|
||||
let offen = 1, idxen = 0, offset = 0 in {
|
||||
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
|
||||
i1imm:$tfe),
|
||||
asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
}
|
||||
|
||||
let offen = 0, idxen = 1 in {
|
||||
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||
i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||
i1imm:$slc, i1imm:$tfe),
|
||||
asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
}
|
||||
|
||||
let offen = 1, idxen = 1 in {
|
||||
def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_64:$vaddr,
|
||||
SSrc_32:$soffset, i1imm:$glc,
|
||||
i1imm:$slc, i1imm:$tfe),
|
||||
asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||
}
|
||||
}
|
||||
|
||||
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
|
||||
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
||||
(ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
||||
asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,8 @@ def InterpSlot : Operand<i32> {
|
||||
let PrintMethod = "printInterpSlot";
|
||||
}
|
||||
|
||||
def SendMsgImm : Operand<i32>;
|
||||
|
||||
def isSI : Predicate<"Subtarget.getGeneration() "
|
||||
">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
|
||||
|
||||
@ -826,17 +828,25 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
|
||||
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
|
||||
[]
|
||||
>;
|
||||
} // End hasSideEffects
|
||||
//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
|
||||
//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
|
||||
//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
|
||||
//def S_SENDMSG : SOPP_ <0x00000010, "S_SENDMSG", []>;
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
|
||||
[(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
|
||||
> {
|
||||
let DisableEncoding = "$m0";
|
||||
}
|
||||
} // End Uses = [EXEC]
|
||||
|
||||
//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
|
||||
//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
|
||||
//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
|
||||
//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
|
||||
//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
|
||||
//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
|
||||
} // End hasSideEffects
|
||||
|
||||
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
|
||||
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
|
||||
@ -979,14 +989,16 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
|
||||
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
|
||||
// No patterns so that the scalar instructions are always selected.
|
||||
// The scalar versions will be replaced with vector when needed later.
|
||||
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", []>;
|
||||
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", []>;
|
||||
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">;
|
||||
defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
|
||||
defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
|
||||
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
|
||||
"V_SUB_I32">;
|
||||
|
||||
let Uses = [VCC] in { // Carry-in comes from VCC
|
||||
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>;
|
||||
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>;
|
||||
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">;
|
||||
defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
|
||||
defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
|
||||
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
|
||||
"V_SUBB_U32">;
|
||||
} // End Uses = [VCC]
|
||||
} // End isCommutable = 1, Defs = [VCC]
|
||||
|
||||
@ -1403,7 +1415,7 @@ def : Pat <
|
||||
/* int_SI_vs_load_input */
|
||||
def : Pat<
|
||||
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
|
||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
/* int_SI_export */
|
||||
@ -1658,16 +1670,30 @@ def : Pat <
|
||||
0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||
>;
|
||||
|
||||
/********** ================================ **********/
|
||||
/********** Floating point absolute/negative **********/
|
||||
/********** ================================ **********/
|
||||
|
||||
// Manipulate the sign bit directly, as e.g. using the source negation modifier
|
||||
// in V_ADD_F32_e64 $src, 0, [...] does not result in -0.0 for $src == +0.0,
|
||||
// breaking the piglit *s-floatBitsToInt-neg* tests
|
||||
|
||||
// TODO: Look into not implementing isFNegFree/isFAbsFree for SI, and possibly
|
||||
// removing these patterns
|
||||
|
||||
def : Pat <
|
||||
(fneg (fabs f32:$src)),
|
||||
(V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(fabs f32:$src),
|
||||
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
|
||||
1 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
|
||||
(V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(fneg f32:$src),
|
||||
(V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
|
||||
0 /* ABS */, 0 /* CLAMP */, 0 /* OMOD */, 1 /* NEG */)
|
||||
(V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
|
||||
>;
|
||||
|
||||
/********** ================== **********/
|
||||
@ -1794,6 +1820,11 @@ def : Pat <
|
||||
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (zext i1:$src0)),
|
||||
(V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
|
||||
>;
|
||||
|
||||
// 1. Offset as 8bit DWORD immediate
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
|
||||
@ -1809,7 +1840,7 @@ def : Pat <
|
||||
// 3. Offset in an 32Bit VGPR
|
||||
def : Pat <
|
||||
(SIload_constant i128:$sbase, i32:$voff),
|
||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
|
||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
// The multiplication scales from [0,1] to the unsigned integer range
|
||||
@ -1970,6 +2001,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
|
||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
|
||||
|
||||
// BUFFER_LOAD_DWORD*, addr64=0
|
||||
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
|
||||
MUBUF bothen> {
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm:$offset, 0, 0, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
(as_i1imm $slc), (as_i1imm $tfe))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm, 1, 0, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
(as_i1imm $tfe))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||
imm:$offset, 0, 1, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||
(as_i1imm $slc), (as_i1imm $tfe))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
|
||||
imm, 1, 1, imm:$glc, imm:$slc,
|
||||
imm:$tfe)),
|
||||
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||
(as_i1imm $tfe))
|
||||
>;
|
||||
}
|
||||
|
||||
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
|
||||
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
|
||||
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
|
||||
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
|
||||
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
|
||||
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MTBUF Patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2057,6 +2132,11 @@ def : Pat <
|
||||
(EXTRACT_SUBREG $a, sub0)
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (trunc i32:$a)),
|
||||
(V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
|
||||
>;
|
||||
|
||||
// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector
|
||||
// case, the sgpr-copies pass will fix this to use the vector version.
|
||||
def : Pat <
|
||||
|
@ -38,6 +38,22 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
llvm_i32_ty], // tfe(imm)
|
||||
[]>;
|
||||
|
||||
// Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
|
||||
def int_SI_buffer_load_dword : Intrinsic <
|
||||
[llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
|
||||
[llvm_anyint_ty, // rsrc(SGPR)
|
||||
llvm_anyint_ty, // vaddr(VGPR)
|
||||
llvm_i32_ty, // soffset(SGPR)
|
||||
llvm_i32_ty, // inst_offset(imm)
|
||||
llvm_i32_ty, // offen(imm)
|
||||
llvm_i32_ty, // idxen(imm)
|
||||
llvm_i32_ty, // glc(imm)
|
||||
llvm_i32_ty, // slc(imm)
|
||||
llvm_i32_ty], // tfe(imm)
|
||||
[IntrReadArgMem]>;
|
||||
|
||||
def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_SI_sample : Sample;
|
||||
|
@ -109,6 +109,23 @@ FunctionPass *llvm::createSILowerControlFlowPass(TargetMachine &tm) {
|
||||
return new SILowerControlFlowPass(tm);
|
||||
}
|
||||
|
||||
static bool isDS(unsigned Opcode) {
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::DS_ADD_U32_RTN:
|
||||
case AMDGPU::DS_SUB_U32_RTN:
|
||||
case AMDGPU::DS_WRITE_B32:
|
||||
case AMDGPU::DS_WRITE_B8:
|
||||
case AMDGPU::DS_WRITE_B16:
|
||||
case AMDGPU::DS_READ_B32:
|
||||
case AMDGPU::DS_READ_I8:
|
||||
case AMDGPU::DS_READ_U8:
|
||||
case AMDGPU::DS_READ_I16:
|
||||
case AMDGPU::DS_READ_U16:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
|
||||
MachineBasicBlock *To) {
|
||||
|
||||
@ -145,7 +162,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
if (!shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
|
||||
ShaderType::PIXEL ||
|
||||
!shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
return;
|
||||
|
||||
MachineBasicBlock::iterator Insert = &MI;
|
||||
@ -296,9 +315,11 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
// Kill is only allowed in pixel shaders
|
||||
// Kill is only allowed in pixel / geometry shaders
|
||||
assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
|
||||
ShaderType::PIXEL);
|
||||
ShaderType::PIXEL ||
|
||||
MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
|
||||
ShaderType::GEOMETRY);
|
||||
|
||||
// Clear this pixel from the exec mask if the operand is negative
|
||||
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
|
||||
@ -431,6 +452,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
Next = llvm::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
if (isDS(MI.getOpcode())) {
|
||||
NeedM0 = true;
|
||||
NeedWQM = true;
|
||||
}
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_IF:
|
||||
@ -491,14 +517,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
IndirectDst(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::DS_READ_B32:
|
||||
NeedWQM = true;
|
||||
// Fall through
|
||||
case AMDGPU::DS_WRITE_B32:
|
||||
case AMDGPU::DS_ADD_U32_RTN:
|
||||
NeedM0 = true;
|
||||
break;
|
||||
|
||||
case AMDGPU::V_INTERP_P1_F32:
|
||||
case AMDGPU::V_INTERP_P2_F32:
|
||||
case AMDGPU::V_INTERP_MOV_F32:
|
||||
@ -517,7 +535,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
AMDGPU::M0).addImm(0xffffffff);
|
||||
}
|
||||
|
||||
if (NeedWQM && MFI->ShaderType != ShaderType::COMPUTE) {
|
||||
if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
|
||||
MachineBasicBlock &MBB = MF.front();
|
||||
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
|
||||
AMDGPU::EXEC).addReg(AMDGPU::EXEC);
|
||||
|
@ -1181,16 +1181,23 @@ X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
|
||||
unsigned Scale, SMLoc Start, SMLoc End,
|
||||
unsigned Size, StringRef Identifier,
|
||||
InlineAsmIdentifierInfo &Info){
|
||||
if (isa<MCSymbolRefExpr>(Disp)) {
|
||||
// If this is not a VarDecl then assume it is a FuncDecl or some other label
|
||||
// reference. We need an 'r' constraint here, so we need to create register
|
||||
// operand to ensure proper matching. Just pick a GPR based on the size of
|
||||
// a pointer.
|
||||
if (!Info.IsVarDecl) {
|
||||
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
|
||||
return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
|
||||
SMLoc(), Identifier, Info.OpDecl);
|
||||
}
|
||||
// If this is not a VarDecl then assume it is a FuncDecl or some other label
|
||||
// reference. We need an 'r' constraint here, so we need to create register
|
||||
// operand to ensure proper matching. Just pick a GPR based on the size of
|
||||
// a pointer.
|
||||
if (isa<MCSymbolRefExpr>(Disp) && !Info.IsVarDecl) {
|
||||
unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX;
|
||||
return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true,
|
||||
SMLoc(), Identifier, Info.OpDecl);
|
||||
}
|
||||
|
||||
// We either have a direct symbol reference, or an offset from a symbol. The
|
||||
// parser always puts the symbol on the LHS, so look there for size
|
||||
// calculation purposes.
|
||||
const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
|
||||
bool IsSymRef =
|
||||
isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
|
||||
if (IsSymRef) {
|
||||
if (!Size) {
|
||||
Size = Info.Type * 8; // Size is in terms of bits in this context.
|
||||
if (Size)
|
||||
@ -1312,10 +1319,15 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
|
||||
if (getParser().parsePrimaryExpr(Val, End))
|
||||
return Error(Tok.getLoc(), "Unexpected identifier!");
|
||||
} else {
|
||||
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return true;
|
||||
// This is a dot operator, not an adjacent identifier.
|
||||
if (Identifier.find('.') != StringRef::npos) {
|
||||
return false;
|
||||
} else {
|
||||
InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
|
||||
if (ParseIntelIdentifier(Val, Identifier, Info,
|
||||
/*Unevaluated=*/false, End))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
SM.onIdentifierExpr(Val, Identifier);
|
||||
UpdateLocLex = false;
|
||||
@ -1366,7 +1378,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
if (ParseIntelExpression(SM, End))
|
||||
return 0;
|
||||
|
||||
const MCExpr *Disp;
|
||||
const MCExpr *Disp = 0;
|
||||
if (const MCExpr *Sym = SM.getSym()) {
|
||||
// A symbolic displacement.
|
||||
Disp = Sym;
|
||||
@ -1374,13 +1386,20 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
|
||||
RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
|
||||
ImmDisp, SM.getImm(), BracLoc, StartInBrac,
|
||||
End);
|
||||
} else {
|
||||
// An immediate displacement only.
|
||||
Disp = MCConstantExpr::Create(SM.getImm(), getContext());
|
||||
}
|
||||
|
||||
// Parse the dot operator (e.g., [ebx].foo.bar).
|
||||
if (Tok.getString().startswith(".")) {
|
||||
if (SM.getImm() || !Disp) {
|
||||
const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
|
||||
if (Disp)
|
||||
Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
|
||||
else
|
||||
Disp = Imm; // An immediate displacement only.
|
||||
}
|
||||
|
||||
// Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
|
||||
// will in fact do global lookup the field name inside all global typedefs,
|
||||
// but we don't emulate that.
|
||||
if (Tok.getString().find('.') != StringRef::npos) {
|
||||
const MCExpr *NewDisp;
|
||||
if (ParseIntelDotOperator(Disp, NewDisp))
|
||||
return 0;
|
||||
@ -1532,8 +1551,10 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
|
||||
else
|
||||
return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
|
||||
|
||||
// Drop the '.'.
|
||||
StringRef DotDispStr = Tok.getString().drop_front(1);
|
||||
// Drop the optional '.'.
|
||||
StringRef DotDispStr = Tok.getString();
|
||||
if (DotDispStr.startswith("."))
|
||||
DotDispStr = DotDispStr.drop_front(1);
|
||||
|
||||
// .Imm gets lexed as a real.
|
||||
if (Tok.is(AsmToken::Real)) {
|
||||
|
@ -1065,6 +1065,7 @@ static int readSIB(struct InternalInstruction* insn) {
|
||||
|
||||
switch (base) {
|
||||
case 0x5:
|
||||
case 0xd:
|
||||
switch (modFromModRM(insn->modRM)) {
|
||||
case 0x0:
|
||||
insn->eaDisplacement = EA_DISP_32;
|
||||
@ -1072,13 +1073,11 @@ static int readSIB(struct InternalInstruction* insn) {
|
||||
break;
|
||||
case 0x1:
|
||||
insn->eaDisplacement = EA_DISP_8;
|
||||
insn->sibBase = (insn->addressSize == 4 ?
|
||||
SIB_BASE_EBP : SIB_BASE_RBP);
|
||||
insn->sibBase = (SIBBase)(sibBaseBase + base);
|
||||
break;
|
||||
case 0x2:
|
||||
insn->eaDisplacement = EA_DISP_32;
|
||||
insn->sibBase = (insn->addressSize == 4 ?
|
||||
SIB_BASE_EBP : SIB_BASE_RBP);
|
||||
insn->sibBase = (SIBBase)(sibBaseBase + base);
|
||||
break;
|
||||
case 0x3:
|
||||
debug("Cannot have Mod = 0b11 and a SIB byte");
|
||||
|
@ -65,6 +65,17 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
|
||||
|
||||
// Exceptions handling
|
||||
ExceptionsType = ExceptionHandling::DwarfCFI;
|
||||
|
||||
// FIXME: this should not depend on the target OS version, but on the ld64
|
||||
// version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
|
||||
// FDE relocs may be used.
|
||||
DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
|
||||
|
||||
// old assembler lacks some directives
|
||||
// FIXME: this should really be a check on the assembler characteristics
|
||||
// rather than OS version
|
||||
if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
|
||||
HasWeakDefCanBeHiddenDirective = false;
|
||||
}
|
||||
|
||||
X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
|
||||
|
@ -393,9 +393,11 @@ bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
|
||||
case 'k': // Print SImode register
|
||||
Reg = getX86SubSuperRegister(Reg, MVT::i32);
|
||||
break;
|
||||
case 'q': // Print DImode register
|
||||
// FIXME: gcc will actually print e instead of r for 32-bit.
|
||||
Reg = getX86SubSuperRegister(Reg, MVT::i64);
|
||||
case 'q':
|
||||
// Print 64-bit register names if 64-bit integer registers are available.
|
||||
// Otherwise, print 32-bit register names.
|
||||
MVT::SimpleValueType Ty = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
|
||||
Reg = getX86SubSuperRegister(Reg, Ty);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -15226,9 +15226,15 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
|
||||
MBB->addSuccessor(EndMBB);
|
||||
}
|
||||
|
||||
// Make sure the last operand is EFLAGS, which gets clobbered by the branch
|
||||
// that was just emitted, but clearly shouldn't be "saved".
|
||||
assert((MI->getNumOperands() <= 3 ||
|
||||
!MI->getOperand(MI->getNumOperands() - 1).isReg() ||
|
||||
MI->getOperand(MI->getNumOperands() - 1).getReg() == X86::EFLAGS)
|
||||
&& "Expected last argument to be EFLAGS");
|
||||
unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
|
||||
// In the XMM save block, save all the XMM argument registers.
|
||||
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
|
||||
for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
|
||||
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
|
||||
MachineMemOperand *MMO =
|
||||
F->getMachineMemOperand(
|
||||
@ -17577,12 +17583,30 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
|
||||
// FIXME: need symbolic constants for these magic numbers.
|
||||
// See X86ATTInstPrinter.cpp:printSSECC().
|
||||
unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
|
||||
SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, MVT::f32, CMP00, CMP01,
|
||||
SDValue OnesOrZeroesF = DAG.getNode(NTOperator, DL, CMP00.getValueType(),
|
||||
CMP00, CMP01,
|
||||
DAG.getConstant(x86cc, MVT::i8));
|
||||
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, MVT::i32,
|
||||
OnesOrZeroesF);
|
||||
SDValue ANDed = DAG.getNode(ISD::AND, DL, MVT::i32, OnesOrZeroesI,
|
||||
DAG.getConstant(1, MVT::i32));
|
||||
|
||||
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
|
||||
|
||||
if (is64BitFP && !Subtarget->is64Bit()) {
|
||||
// On a 32-bit target, we cannot bitcast the 64-bit float to a
|
||||
// 64-bit integer, since that's not a legal type. Since
|
||||
// OnesOrZeroesF is all ones of all zeroes, we don't need all the
|
||||
// bits, but can do this little dance to extract the lowest 32 bits
|
||||
// and work with those going forward.
|
||||
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
|
||||
OnesOrZeroesF);
|
||||
SDValue Vector32 = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32,
|
||||
Vector64);
|
||||
OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32,
|
||||
Vector32, DAG.getIntPtrConstant(0));
|
||||
IntVT = MVT::i32;
|
||||
}
|
||||
|
||||
SDValue OnesOrZeroesI = DAG.getNode(ISD::BITCAST, DL, IntVT, OnesOrZeroesF);
|
||||
SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI,
|
||||
DAG.getConstant(1, IntVT));
|
||||
SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ANDed);
|
||||
return OneBitOfTruth;
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
|
||||
|
||||
|
||||
// x86-64 va_start lowering magic.
|
||||
let usesCustomInserter = 1 in {
|
||||
let usesCustomInserter = 1, Defs = [EFLAGS] in {
|
||||
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
|
||||
(outs),
|
||||
(ins GR8:$al,
|
||||
@ -81,7 +81,8 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
|
||||
"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
|
||||
[(X86vastart_save_xmm_regs GR8:$al,
|
||||
imm:$regsavefi,
|
||||
imm:$offset)]>;
|
||||
imm:$offset),
|
||||
(implicit EFLAGS)]>;
|
||||
|
||||
// The VAARG_64 pseudo-instruction takes the address of the va_list,
|
||||
// and places the address of the next argument into a register.
|
||||
|
@ -25,11 +25,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
|
||||
if (isConstant) return true;
|
||||
|
||||
// If all elts are the same, we can extract it and use any of the values.
|
||||
Constant *Op0 = C->getAggregateElement(0U);
|
||||
for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
|
||||
if (C->getAggregateElement(i) != Op0)
|
||||
return false;
|
||||
return true;
|
||||
if (Constant *Op0 = C->getAggregateElement(0U)) {
|
||||
for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e;
|
||||
++i)
|
||||
if (C->getAggregateElement(i) != Op0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
if (!I) return false;
|
||||
|
@ -1088,9 +1088,8 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
|
||||
L, SCEV::FlagAnyWrap));
|
||||
{ // Limit the lifetime of SCEVExpander.
|
||||
SCEVExpander Expander(*SE, "reroll");
|
||||
PHINode *NewIV =
|
||||
cast<PHINode>(Expander.expandCodeFor(H, IV->getType(),
|
||||
Header->begin()));
|
||||
Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
|
||||
|
||||
for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
|
||||
JE = BaseUseSet.end(); J != JE; ++J)
|
||||
(*J)->replaceUsesOfWith(IV, NewIV);
|
||||
@ -1101,20 +1100,23 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
|
||||
if (Inc == 1)
|
||||
ICSCEV =
|
||||
SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
|
||||
Value *IC;
|
||||
if (isa<SCEVConstant>(ICSCEV)) {
|
||||
IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(), BI);
|
||||
// Iteration count SCEV minus 1
|
||||
const SCEV *ICMinus1SCEV =
|
||||
SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
|
||||
|
||||
Value *ICMinus1; // Iteration count minus 1
|
||||
if (isa<SCEVConstant>(ICMinus1SCEV)) {
|
||||
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
|
||||
} else {
|
||||
BasicBlock *Preheader = L->getLoopPreheader();
|
||||
if (!Preheader)
|
||||
Preheader = InsertPreheaderForLoop(L, this);
|
||||
|
||||
IC = Expander.expandCodeFor(ICSCEV, NewIV->getType(),
|
||||
Preheader->getTerminator());
|
||||
ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
|
||||
Preheader->getTerminator());
|
||||
}
|
||||
|
||||
Value *NewIVNext = NewIV->getIncomingValueForBlock(Header);
|
||||
Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIVNext, IC,
|
||||
Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
|
||||
"exitcond");
|
||||
BI->setCondition(Cond);
|
||||
|
||||
|
@ -3390,6 +3390,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
||||
int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
|
||||
if (NewBaseOffset / Factor != Base.BaseOffset)
|
||||
continue;
|
||||
// If the offset will be truncated at this use, check that it is in bounds.
|
||||
if (!IntTy->isPointerTy() &&
|
||||
!ConstantInt::isValueValidForType(IntTy, NewBaseOffset))
|
||||
continue;
|
||||
|
||||
// Check that multiplying with the use offset doesn't overflow.
|
||||
int64_t Offset = LU.MinOffset;
|
||||
@ -3398,6 +3402,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
||||
Offset = (uint64_t)Offset * Factor;
|
||||
if (Offset / Factor != LU.MinOffset)
|
||||
continue;
|
||||
// If the offset will be truncated at this use, check that it is in bounds.
|
||||
if (!IntTy->isPointerTy() &&
|
||||
!ConstantInt::isValueValidForType(IntTy, Offset))
|
||||
continue;
|
||||
|
||||
Formula F = Base;
|
||||
F.BaseOffset = NewBaseOffset;
|
||||
@ -3432,6 +3440,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
||||
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor;
|
||||
if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset)
|
||||
continue;
|
||||
// If the offset will be truncated, check that it is in bounds.
|
||||
if (!IntTy->isPointerTy() &&
|
||||
!ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset))
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we make it here and it's legal, add it.
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/Dominators.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/LoopPass.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
@ -70,6 +71,7 @@ namespace {
|
||||
AU.addRequired<DominatorTree>();
|
||||
AU.addRequired<LoopInfo>();
|
||||
AU.addPreservedID(LoopSimplifyID);
|
||||
AU.addPreserved<AliasAnalysis>();
|
||||
AU.addPreserved<ScalarEvolution>();
|
||||
}
|
||||
private:
|
||||
|
@ -4191,13 +4191,22 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process instructions only once (termination).
|
||||
// Process instructions only once (termination). Each reduction cycle
|
||||
// value must only be used once, except by phi nodes and min/max
|
||||
// reductions which are represented as a cmp followed by a select.
|
||||
ReductionInstDesc IgnoredVal(false, 0);
|
||||
if (VisitedInsts.insert(Usr)) {
|
||||
if (isa<PHINode>(Usr))
|
||||
PHIs.push_back(Usr);
|
||||
else
|
||||
NonPHIs.push_back(Usr);
|
||||
}
|
||||
} else if (!isa<PHINode>(Usr) &&
|
||||
((!isa<FCmpInst>(Usr) &&
|
||||
!isa<ICmpInst>(Usr) &&
|
||||
!isa<SelectInst>(Usr)) ||
|
||||
!isMinMaxSelectCmpPattern(Usr, IgnoredVal).IsReduction))
|
||||
return false;
|
||||
|
||||
// Remember that we completed the cycle.
|
||||
if (Usr == Phi)
|
||||
FoundStartPHI = true;
|
||||
|
33
test/Analysis/BasicAA/noalias-bugs.ll
Normal file
33
test/Analysis/BasicAA/noalias-bugs.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: opt -S -basicaa -dse < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; We incorrectly returned noalias in the example below for "ptr.64" and
|
||||
; "either_ptr.64".
|
||||
; PR18460
|
||||
|
||||
%nested = type { %nested.i64 }
|
||||
%nested.i64 = type { i64 }
|
||||
|
||||
define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
|
||||
i32 %a, i32 %b) {
|
||||
%ptr = getelementptr inbounds %nested* %p1, i64 -1, i32 0
|
||||
%ptr.64 = getelementptr inbounds %nested.i64* %ptr, i64 0, i32 0
|
||||
%ptr2= getelementptr inbounds %nested* %p2, i64 0, i32 0
|
||||
%cmp = icmp ult i32 %a, %b
|
||||
%either_ptr = select i1 %cmp, %nested.i64* %ptr2, %nested.i64* %ptr
|
||||
%either_ptr.64 = getelementptr inbounds %nested.i64* %either_ptr, i64 0, i32 0
|
||||
|
||||
; Because either_ptr.64 and ptr.64 can alias (we used to return noalias)
|
||||
; elimination of the first store is not valid.
|
||||
|
||||
; CHECK: store i64 2
|
||||
; CHECK: load
|
||||
; CHECK; store i64 1
|
||||
|
||||
store i64 2, i64* %ptr.64, align 8
|
||||
%r = load i64* %either_ptr.64, align 8
|
||||
store i64 1, i64* %ptr.64, align 8
|
||||
ret i64 %r
|
||||
}
|
@ -1,10 +1,14 @@
|
||||
; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; rdar://7282591
|
||||
|
||||
@X = common global i32 0
|
||||
@Y = common global i32 0
|
||||
@Z = common global i32 0
|
||||
|
||||
; CHECK-LABEL: foo
|
||||
; CHECK: NoAlias: i32* %P, i32* @Z
|
||||
|
||||
define void @foo(i32 %cond) nounwind {
|
||||
@ -29,3 +33,46 @@ bb2:
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Pointers can vary in between iterations of loops.
|
||||
; PR18068
|
||||
|
||||
; CHECK-LABEL: pr18068
|
||||
; CHECK: MayAlias: i32* %0, i32* %arrayidx5
|
||||
|
||||
define i32 @pr18068(i32* %jj7, i32* %j) {
|
||||
entry:
|
||||
%oa5 = alloca [100 x i32], align 16
|
||||
br label %codeRepl
|
||||
|
||||
codeRepl:
|
||||
%0 = phi i32* [ %arrayidx13, %for.body ], [ %j, %entry ]
|
||||
%targetBlock = call i1 @cond(i32* %jj7)
|
||||
br i1 %targetBlock, label %for.body, label %bye
|
||||
|
||||
for.body:
|
||||
%1 = load i32* %jj7, align 4
|
||||
%idxprom4 = zext i32 %1 to i64
|
||||
%arrayidx5 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom4
|
||||
%2 = load i32* %arrayidx5, align 4
|
||||
%sub6 = sub i32 %2, 6
|
||||
store i32 %sub6, i32* %arrayidx5, align 4
|
||||
; %0 and %arrayidx5 can alias! It is not safe to DSE the above store.
|
||||
%3 = load i32* %0, align 4
|
||||
store i32 %3, i32* %arrayidx5, align 4
|
||||
%sub11 = add i32 %1, -1
|
||||
%idxprom12 = zext i32 %sub11 to i64
|
||||
%arrayidx13 = getelementptr inbounds [100 x i32]* %oa5, i64 0, i64 %idxprom12
|
||||
call void @inc(i32* %jj7)
|
||||
br label %codeRepl
|
||||
|
||||
bye:
|
||||
%.reload = load i32* %jj7, align 4
|
||||
ret i32 %.reload
|
||||
}
|
||||
|
||||
declare i1 @cond(i32*)
|
||||
|
||||
declare void @inc(i32*)
|
||||
|
||||
|
||||
|
81
test/Analysis/ScalarEvolution/zext-signed-addrec.ll
Normal file
81
test/Analysis/ScalarEvolution/zext-signed-addrec.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
; PR18000
|
||||
|
||||
target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
@a = global i32 0, align 4
|
||||
@b = common global i32 0, align 4
|
||||
@e = common global i8 0, align 1
|
||||
@d = common global i32 0, align 4
|
||||
@c = common global i32 0, align 4
|
||||
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
||||
|
||||
; Function Attrs: nounwind optsize uwtable
|
||||
; CHECK-LABEL: foo
|
||||
define i32 @foo() {
|
||||
entry:
|
||||
%.pr = load i32* @b, align 4
|
||||
%cmp10 = icmp slt i32 %.pr, 1
|
||||
br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge
|
||||
|
||||
entry.for.end9_crit_edge: ; preds = %entry
|
||||
%.pre = load i32* @c, align 4
|
||||
br label %for.end9
|
||||
|
||||
for.cond1.preheader.lr.ph: ; preds = %entry
|
||||
%0 = load i32* @a, align 4
|
||||
%tobool = icmp eq i32 %0, 0
|
||||
br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split
|
||||
|
||||
for.cond1.preheader.for.cond1.preheader.split_crit_edge: ; preds = %for.cond1.preheader.lr.ph, %for.inc8
|
||||
%1 = phi i32 [ %inc, %for.inc8 ], [ %.pr, %for.cond1.preheader.lr.ph ]
|
||||
br label %if.end
|
||||
|
||||
; CHECK-LABEL: if.end
|
||||
if.end: ; preds = %if.end, %for.cond1.preheader.for.cond1.preheader.split_crit_edge
|
||||
|
||||
; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %if.end ], [ 258, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ]
|
||||
%indvars.iv = phi i32 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %indvars.iv.next, %if.end ]
|
||||
|
||||
%2 = phi i8 [ 1, %for.cond1.preheader.for.cond1.preheader.split_crit_edge ], [ %dec, %if.end ]
|
||||
%conv7 = mul i32 %indvars.iv, 258
|
||||
%shl = and i32 %conv7, 510
|
||||
store i32 %shl, i32* @c, align 4
|
||||
|
||||
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -258
|
||||
%dec = add i8 %2, -1
|
||||
|
||||
%cmp2 = icmp sgt i8 %dec, -1
|
||||
%indvars.iv.next = add i32 %indvars.iv, -1
|
||||
br i1 %cmp2, label %if.end, label %for.inc8
|
||||
|
||||
for.inc8: ; preds = %if.end
|
||||
store i32 0, i32* @d, align 4
|
||||
%inc = add nsw i32 %1, 1
|
||||
store i32 %inc, i32* @b, align 4
|
||||
%cmp = icmp slt i32 %1, 0
|
||||
br i1 %cmp, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %for.cond.for.end9_crit_edge
|
||||
|
||||
for.cond.for.end9_crit_edge: ; preds = %for.inc8
|
||||
store i8 %dec, i8* @e, align 1
|
||||
br label %for.end9
|
||||
|
||||
for.end9: ; preds = %entry.for.end9_crit_edge, %for.cond.for.end9_crit_edge
|
||||
%3 = phi i32 [ %.pre, %entry.for.end9_crit_edge ], [ %shl, %for.cond.for.end9_crit_edge ]
|
||||
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
|
||||
br label %return
|
||||
|
||||
return.loopexit.split: ; preds = %for.cond1.preheader.lr.ph
|
||||
store i8 1, i8* @e, align 1
|
||||
store i32 0, i32* @d, align 4
|
||||
br label %return
|
||||
|
||||
return: ; preds = %return.loopexit.split, %for.end9
|
||||
%retval.0 = phi i32 [ 0, %for.end9 ], [ 1, %return.loopexit.split ]
|
||||
ret i32 %retval.0
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind optsize
|
||||
declare i32 @printf(i8* nocapture readonly, ...)
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-REG %s
|
||||
|
||||
@var8 = global i8 0
|
||||
@var16 = global i16 0
|
||||
@ -17,6 +18,8 @@ define i8 @test_atomic_load_add_i8(i8 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -37,6 +40,8 @@ define i16 @test_atomic_load_add_i16(i16 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -57,6 +62,8 @@ define i32 @test_atomic_load_add_i32(i32 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: add w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -77,6 +84,8 @@ define i64 @test_atomic_load_add_i64(i64 %offset) nounwind {
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: add [[NEW:x[0-9]+]], x[[OLD]], x0
|
||||
; CHECK-REG: add x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -97,6 +106,8 @@ define i8 @test_atomic_load_sub_i8(i8 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -117,6 +128,8 @@ define i16 @test_atomic_load_sub_i16(i16 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -137,6 +150,8 @@ define i32 @test_atomic_load_sub_i32(i32 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: sub [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: sub w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -157,6 +172,8 @@ define i64 @test_atomic_load_sub_i64(i64 %offset) nounwind {
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: sub [[NEW:x[0-9]+]], x[[OLD]], x0
|
||||
; CHECK-REG: sub x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -177,6 +194,8 @@ define i8 @test_atomic_load_and_i8(i8 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -197,6 +216,8 @@ define i16 @test_atomic_load_and_i16(i16 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -217,6 +238,8 @@ define i32 @test_atomic_load_and_i32(i32 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: and [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: and w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -237,6 +260,8 @@ define i64 @test_atomic_load_and_i64(i64 %offset) nounwind {
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: and [[NEW:x[0-9]+]], x[[OLD]], x0
|
||||
; CHECK-REG: and x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -257,6 +282,8 @@ define i8 @test_atomic_load_or_i8(i8 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -277,6 +304,8 @@ define i16 @test_atomic_load_or_i16(i16 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -297,6 +326,8 @@ define i32 @test_atomic_load_or_i32(i32 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: orr [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: orr w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -317,6 +348,8 @@ define i64 @test_atomic_load_or_i64(i64 %offset) nounwind {
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: orr [[NEW:x[0-9]+]], x[[OLD]], x0
|
||||
; CHECK-REG: orr x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -337,6 +370,8 @@ define i8 @test_atomic_load_xor_i8(i8 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -357,6 +392,8 @@ define i16 @test_atomic_load_xor_i16(i16 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -377,6 +414,8 @@ define i32 @test_atomic_load_xor_i32(i32 %offset) nounwind {
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: eor [[NEW:w[0-9]+]], w[[OLD]], w0
|
||||
; CHECK-REG: eor w[[NEW:[0-9]+]], w{{[0-9]+}}, w0
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -397,6 +436,8 @@ define i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: eor [[NEW:x[0-9]+]], x[[OLD]], x0
|
||||
; CHECK-REG: eor x[[NEW:[0-9]+]], x{{[0-9]+}}, x0
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -416,6 +457,7 @@ define i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
|
||||
; CHECK-NEXT: ldxrb w[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-REG-NOT: stxrb w0, w0, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -435,6 +477,7 @@ define i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
|
||||
; CHECK-NEXT: ldaxrh w[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-REG-NOT: stlxrh w0, w0, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -454,6 +497,7 @@ define i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
|
||||
; CHECK-NEXT: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-REG-NOT: stlxr w0, w0, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w0, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -473,6 +517,7 @@ define i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind {
|
||||
; CHECK-NEXT: ldaxr x[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; x0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-REG-NOT: stxr w0, x0, [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], x0, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -495,6 +540,8 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
|
||||
; CHECK-REG-NOT: stxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -516,6 +563,8 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], sxth
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
|
||||
; CHECK-REG-NOT: stlxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -537,6 +586,8 @@ define i32 @test_atomic_load_min_i32(i32 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, gt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, gt
|
||||
; CHECK-REG-NOT: stxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -558,6 +609,8 @@ define i64 @test_atomic_load_min_i64(i64 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp x0, x[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, gt
|
||||
; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, gt
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -579,6 +632,8 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], sxtb
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
|
||||
; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -600,6 +655,8 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], sxth
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -621,6 +678,8 @@ define i32 @test_atomic_load_max_i32(i32 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lt
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lt
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -642,6 +701,8 @@ define i64 @test_atomic_load_max_i64(i64 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp x0, x[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lt
|
||||
; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lt
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -663,6 +724,8 @@ define i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -684,6 +747,8 @@ define i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], uxth
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -705,6 +770,8 @@ define i32 @test_atomic_load_umin_i32(i32 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, hi
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, hi
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -726,6 +793,8 @@ define i64 @test_atomic_load_umin_i64(i64 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp x0, x[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, hi
|
||||
; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, hi
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -747,6 +816,8 @@ define i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], uxtb
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
|
||||
; CHECK-REG-NOT: stlxrb w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxrb [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -768,6 +839,8 @@ define i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]], uxth
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
|
||||
; CHECK-REG-NOT: stxrh w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stxrh [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -789,6 +862,8 @@ define i32 @test_atomic_load_umax_i32(i32 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w0, w[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:w[0-9]+]], w[[OLD]], w0, lo
|
||||
; CHECK-REG: csel w[[NEW:[0-9]+]], w{{[0-9]+}}, w0, lo
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], w[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -810,6 +885,8 @@ define i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp x0, x[[OLD]]
|
||||
; CHECK-NEXT: csel [[NEW:x[0-9]+]], x[[OLD]], x0, lo
|
||||
; CHECK-REG: csel x[[NEW:[0-9]+]], x{{[0-9]+}}, x0, lo
|
||||
; CHECK-REG-NOT: stlxr w[[NEW]], x[[NEW]], [x{{[0-9]+}}]
|
||||
; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
|
||||
; CHECK-NOT: dmb
|
||||
@ -832,6 +909,7 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
|
||||
; CHECK-NEXT: cmp w[[OLD]], w0
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; As above, w1 is a reasonable guess.
|
||||
; CHECK-REG-NOT: stxrb w1, w1, [x{{[0-9]+}}]
|
||||
; CHECK: stxrb [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK-NOT: dmb
|
||||
@ -854,6 +932,7 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
|
||||
; CHECK-NEXT: cmp w[[OLD]], w0
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; As above, w1 is a reasonable guess.
|
||||
; CHECK-REG-NOT: stlxrh w1, w1, [x{{[0-9]+}}]
|
||||
; CHECK: stlxrh [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK-NOT: dmb
|
||||
@ -876,6 +955,7 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
; CHECK-NEXT: cmp w[[OLD]], w0
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; As above, w1 is a reasonable guess.
|
||||
; CHECK-REG-NOT: stlxr w1, w1, [x{{[0-9]+}}]
|
||||
; CHECK: stlxr [[STATUS:w[0-9]+]], w1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK-NOT: dmb
|
||||
@ -898,6 +978,7 @@ define i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
|
||||
; CHECK-NEXT: cmp x[[OLD]], x0
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; As above, w1 is a reasonable guess.
|
||||
; CHECK-REG-NOT: stxr w1, x1, [x{{[0-9]+}}]
|
||||
; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK-NOT: dmb
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array < %s | FileCheck %s
|
||||
|
||||
define internal void @_GLOBAL__I_a() section ".text.startup" {
|
||||
ret void
|
||||
|
@ -179,24 +179,19 @@ define void @test_va_copy() {
|
||||
|
||||
; Check beginning and end again:
|
||||
|
||||
; CHECK: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
|
||||
; CHECK: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
|
||||
; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
|
||||
; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
|
||||
|
||||
; CHECK: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
|
||||
|
||||
; CHECK: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
|
||||
; CHECK: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
|
||||
; CHECK: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
|
||||
; CHECK: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
|
||||
; CHECK: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
|
||||
; CHECK: str [[BLOCK2]], [x[[DEST_LIST]], #24]
|
||||
|
||||
; CHECK: str [[BLOCK]], [x[[DEST_LIST]], #24]
|
||||
|
||||
; CHECK-NOFP: str [[BLOCK]], [{{x[0-9]+}}, #:lo12:second_list]
|
||||
|
||||
; CHECK-NOFP: ldr [[BLOCK:x[0-9]+]], [x[[SRC_LIST]], #24]
|
||||
; CHECK-NOFP: add x[[SRC_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:var
|
||||
; CHECK-NOFP: add x[[DEST_LIST:[0-9]+]], {{x[0-9]+}}, #:lo12:second_list
|
||||
|
||||
; CHECK-NOFP: str [[BLOCK]], [x[[DEST_LIST]], #24]
|
||||
; CHECK-NOFP: ldr [[BLOCK1:x[0-9]+]], [{{x[0-9]+}}, #:lo12:var]
|
||||
; CHECK-NOFP: ldr [[BLOCK2:x[0-9]+]], [x[[SRC_LIST]], #24]
|
||||
; CHECK-NOFP: str [[BLOCK1]], [{{x[0-9]+}}, #:lo12:second_list]
|
||||
; CHECK-NOFP: str [[BLOCK2]], [x[[DEST_LIST]], #24]
|
||||
|
||||
ret void
|
||||
; CHECK: ret
|
||||
|
@ -56,3 +56,62 @@ define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) {
|
||||
%i2 = fadd <4 x float> %i1, %i1
|
||||
ret <4 x float> %i2
|
||||
}
|
||||
|
||||
; Test that DPair can be successfully passed as QPR.
|
||||
; CHECK-ENABLED-LABEL: test_DPair1:
|
||||
; CHECK-DISABLED-LABEL: test_DPair1:
|
||||
define void @test_DPair1(i32 %vsout, i8* nocapture %out, float %x, float %y) {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> undef, float %x, i32 1
|
||||
%1 = insertelement <4 x float> %0, float %y, i32 0
|
||||
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
|
||||
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
|
||||
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0]
|
||||
; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[1]
|
||||
; CHECK-DISABLED-NOT: vdup
|
||||
switch i32 %vsout, label %sw.epilog [
|
||||
i32 1, label %sw.bb
|
||||
i32 0, label %sw.bb6
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %entry
|
||||
%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 0
|
||||
br label %sw.bb6
|
||||
|
||||
sw.bb6: ; preds = %sw.bb, %entry
|
||||
%sum.0 = phi <4 x float> [ %1, %entry ], [ %2, %sw.bb ]
|
||||
%3 = extractelement <4 x float> %sum.0, i32 0
|
||||
%conv = fptoui float %3 to i8
|
||||
store i8 %conv, i8* %out, align 1
|
||||
ret void
|
||||
|
||||
sw.epilog: ; preds = %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-ENABLED-LABEL: test_DPair2:
|
||||
; CHECK-DISABLED-LABEL: test_DPair2:
|
||||
define void @test_DPair2(i32 %vsout, i8* nocapture %out, float %x) {
|
||||
entry:
|
||||
%0 = insertelement <4 x float> undef, float %x, i32 0
|
||||
; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d{{[0-9]*}}[0]
|
||||
; CHECK-DISABLED-NOT: vdup
|
||||
switch i32 %vsout, label %sw.epilog [
|
||||
i32 1, label %sw.bb
|
||||
i32 0, label %sw.bb1
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %entry
|
||||
%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 0
|
||||
br label %sw.bb1
|
||||
|
||||
sw.bb1: ; preds = %entry, %sw.bb
|
||||
%sum.0 = phi <4 x float> [ %0, %entry ], [ %1, %sw.bb ]
|
||||
%2 = extractelement <4 x float> %sum.0, i32 0
|
||||
%conv = fptoui float %2 to i8
|
||||
store i8 %conv, i8* %out, align 1
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog: ; preds = %entry, %sw.bb1
|
||||
ret void
|
||||
}
|
@ -83,6 +83,19 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
|
||||
ret <1 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
|
||||
;CHECK-LABEL: vld3i64_update:
|
||||
;CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
|
||||
%tmp0 = bitcast i64* %A to i8*
|
||||
%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
|
||||
%tmp5 = getelementptr i64* %A, i32 3
|
||||
store i64* %tmp5, i64** %ptr
|
||||
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
|
||||
%tmp4 = add <1 x i64> %tmp2, %tmp3
|
||||
ret <1 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <16 x i8> @vld3Qi8(i8* %A) nounwind {
|
||||
;CHECK-LABEL: vld3Qi8:
|
||||
;Check the alignment value. Max for this instruction is 64 bits:
|
||||
|
@ -83,6 +83,19 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
|
||||
ret <1 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
|
||||
;CHECK-LABEL: vld4i64_update:
|
||||
;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
|
||||
%tmp0 = bitcast i64* %A to i8*
|
||||
%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
|
||||
%tmp5 = getelementptr i64* %A, i32 4
|
||||
store i64* %tmp5, i64** %ptr
|
||||
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
|
||||
%tmp4 = add <1 x i64> %tmp2, %tmp3
|
||||
ret <1 x i64> %tmp4
|
||||
}
|
||||
|
||||
define <16 x i8> @vld4Qi8(i8* %A) nounwind {
|
||||
;CHECK-LABEL: vld4Qi8:
|
||||
;Check the alignment value. Max for this instruction is 256 bits:
|
||||
|
@ -61,6 +61,18 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
|
||||
;CHECK-LABEL: vst3i64_update
|
||||
;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
|
||||
%A = load i64** %ptr
|
||||
%tmp0 = bitcast i64* %A to i8*
|
||||
%tmp1 = load <1 x i64>* %B
|
||||
call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
|
||||
%tmp2 = getelementptr i64* %A, i32 3
|
||||
store i64* %tmp2, i64** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
|
||||
;CHECK-LABEL: vst3Qi8:
|
||||
;Check the alignment value. Max for this instruction is 64 bits:
|
||||
|
@ -60,6 +60,18 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
|
||||
;CHECK-LABEL: vst4i64_update:
|
||||
;CHECK: vst1.64 {d16, d17, d18, d19}, [r1]!
|
||||
%A = load i64** %ptr
|
||||
%tmp0 = bitcast i64* %A to i8*
|
||||
%tmp1 = load <1 x i64>* %B
|
||||
call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
|
||||
%tmp2 = getelementptr i64* %A, i32 4
|
||||
store i64* %tmp2, i64** %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
|
||||
;CHECK-LABEL: vst4Qi8:
|
||||
;Check the alignment value. Max for this instruction is 256 bits:
|
||||
|
@ -119,9 +119,9 @@ unequal:
|
||||
; CHECK: ld 3, -[[OFFSET1]](1)
|
||||
|
||||
; DARWIN32: _func3:
|
||||
; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 40
|
||||
; DARWIN32: addi r[[REG1:[0-9]+]], r[[REGSP:[0-9]+]], 36
|
||||
; DARWIN32: addi r[[REG2:[0-9]+]], r[[REGSP]], 24
|
||||
; DARWIN32: lwz r[[REG3:[0-9]+]], 48(r[[REGSP]])
|
||||
; DARWIN32: lwz r[[REG3:[0-9]+]], 44(r[[REGSP]])
|
||||
; DARWIN32: lwz r[[REG4:[0-9]+]], 32(r[[REGSP]])
|
||||
; DARWIN32: cmplw cr{{[0-9]+}}, r[[REG4]], r[[REG3]]
|
||||
; DARWIN32: stw r[[REG3]], -[[OFFSET1:[0-9]+]]
|
||||
|
17
test/CodeGen/PowerPC/byval-agg-info.ll
Normal file
17
test/CodeGen/PowerPC/byval-agg-info.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llc < %s -print-after=prologepilog >%t 2>&1 && FileCheck <%t %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
%struct.anon = type { i32, i32 }
|
||||
|
||||
declare void @foo(%struct.anon* %v)
|
||||
define void @test(i32 %a, i32 %b, %struct.anon* byval nocapture %v) {
|
||||
entry:
|
||||
call void @foo(%struct.anon* %v)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure that the MMO on the store has no offset from the byval
|
||||
; variable itself (we used to have mem:ST8[%v+64]).
|
||||
; CHECK: STD %X5<kill>, 176, %X1; mem:ST8[%v](align=16)
|
||||
|
70
test/CodeGen/PowerPC/cc.ll
Normal file
70
test/CodeGen/PowerPC/cc.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define i64 @test1(i64 %a, i64 %b) {
|
||||
entry:
|
||||
%c = icmp eq i64 %a, %b
|
||||
br label %foo
|
||||
|
||||
foo:
|
||||
call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cr0},~{cr1},~{cr2},~{cr3},~{cr4},~{cr5},~{cr6},~{cr7}" (i64 %a)
|
||||
br i1 %c, label %bar, label %end
|
||||
|
||||
bar:
|
||||
ret i64 %b
|
||||
|
||||
end:
|
||||
ret i64 %a
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: mfcr [[REG1:[0-9]+]]
|
||||
; CHECK-DAG: cmpld
|
||||
; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: stw [[REG1]], 8(1)
|
||||
; CHECK-DAG: stw [[REG2]], -4(1)
|
||||
|
||||
; CHECK: sc
|
||||
; CHECK: lwz [[REG3:[0-9]+]], -4(1)
|
||||
; CHECK: mtocrf 128, [[REG3]]
|
||||
|
||||
; CHECK: lwz [[REG4:[0-9]+]], 8(1)
|
||||
; CHECK-DAG: mtocrf 32, [[REG4]]
|
||||
; CHECK-DAG: mtocrf 16, [[REG4]]
|
||||
; CHECK-DAG: mtocrf 8, [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define i64 @test2(i64 %a, i64 %b) {
|
||||
entry:
|
||||
%c = icmp eq i64 %a, %b
|
||||
br label %foo
|
||||
|
||||
foo:
|
||||
call { i64, i64 } asm sideeffect "sc", "={r0},={r3},{r0},~{cc}" (i64 %a)
|
||||
br i1 %c, label %bar, label %end
|
||||
|
||||
bar:
|
||||
ret i64 %b
|
||||
|
||||
end:
|
||||
ret i64 %a
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: mfcr [[REG1:[0-9]+]]
|
||||
; CHECK-DAG: cmpld
|
||||
; CHECK-DAG: mfocrf [[REG2:[0-9]+]],
|
||||
; CHECK-DAG: stw [[REG1]], 8(1)
|
||||
; CHECK-DAG: stw [[REG2]], -4(1)
|
||||
|
||||
; CHECK: sc
|
||||
; CHECK: lwz [[REG3:[0-9]+]], -4(1)
|
||||
; CHECK: mtocrf 128, [[REG3]]
|
||||
|
||||
; CHECK: lwz [[REG4:[0-9]+]], 8(1)
|
||||
; CHECK-DAG: mtocrf 32, [[REG4]]
|
||||
; CHECK-DAG: mtocrf 16, [[REG4]]
|
||||
; CHECK-DAG: mtocrf 8, [[REG4]]
|
||||
; CHECK: blr
|
||||
}
|
||||
|
31
test/CodeGen/PowerPC/ctrloop-udivti3.ll
Normal file
31
test/CodeGen/PowerPC/ctrloop-udivti3.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: llc < %s -march=ppc64 | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define hidden void @_mpd_shortdiv(i64 %n) #0 {
|
||||
entry:
|
||||
br i1 undef, label %for.end, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %for.body.lr.ph
|
||||
%i.018.in = phi i64 [ %n, %for.body.lr.ph ], [ %i.018, %for.body ]
|
||||
%i.018 = add i64 %i.018.in, -1
|
||||
%add.i = or i128 undef, undef
|
||||
%div.i = udiv i128 %add.i, 0
|
||||
%conv3.i11 = trunc i128 %div.i to i64
|
||||
store i64 %conv3.i11, i64* undef, align 8
|
||||
%cmp = icmp eq i64 %i.018, 0
|
||||
br i1 %cmp, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @_mpd_shortdiv
|
||||
; CHECK-NOT: mtctr
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
153
test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
Normal file
153
test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
Normal file
@ -0,0 +1,153 @@
|
||||
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr5 | FileCheck %s --check-prefix=ELF64
|
||||
|
||||
; Test sitofp
|
||||
|
||||
define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: sitofp_double_i32
|
||||
%b.addr = alloca double, align 8
|
||||
%conv = sitofp i32 %a to double
|
||||
; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
|
||||
; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
|
||||
; ELF64: fcfid
|
||||
store double %conv, double* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: sitofp_double_i64
|
||||
%b.addr = alloca double, align 8
|
||||
%conv = sitofp i64 %a to double
|
||||
; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
|
||||
; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
|
||||
; ELF64: fcfid
|
||||
store double %conv, double* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: sitofp_double_i16
|
||||
%b.addr = alloca double, align 8
|
||||
%conv = sitofp i16 %a to double
|
||||
; ELF64: extsh
|
||||
; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
|
||||
; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
|
||||
; ELF64: fcfid
|
||||
store double %conv, double* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: sitofp_double_i8
|
||||
%b.addr = alloca double, align 8
|
||||
%conv = sitofp i8 %a to double
|
||||
; ELF64: extsb
|
||||
; ELF64: std {{[0-9]+}}, -[[OFFSET:[0-9]+]](1)
|
||||
; ELF64: lfd {{[0-9]+}}, -[[OFFSET]](1)
|
||||
; ELF64: fcfid
|
||||
store double %conv, double* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test fptosi
|
||||
|
||||
define void @fptosi_float_i32(float %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptosi_float_i32
|
||||
%b.addr = alloca i32, align 4
|
||||
%conv = fptosi float %a to i32
|
||||
; ELF64: fctiwz
|
||||
; ELF64: stfd
|
||||
; ELF64: lwa
|
||||
store i32 %conv, i32* %b.addr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptosi_float_i64(float %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptosi_float_i64
|
||||
%b.addr = alloca i64, align 4
|
||||
%conv = fptosi float %a to i64
|
||||
; ELF64: fctidz
|
||||
; ELF64: stfd
|
||||
; ELF64: ld
|
||||
store i64 %conv, i64* %b.addr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptosi_double_i32(double %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptosi_double_i32
|
||||
%b.addr = alloca i32, align 8
|
||||
%conv = fptosi double %a to i32
|
||||
; ELF64: fctiwz
|
||||
; ELF64: stfd
|
||||
; ELF64: lwa
|
||||
store i32 %conv, i32* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptosi_double_i64(double %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptosi_double_i64
|
||||
%b.addr = alloca i64, align 8
|
||||
%conv = fptosi double %a to i64
|
||||
; ELF64: fctidz
|
||||
; ELF64: stfd
|
||||
; ELF64: ld
|
||||
store i64 %conv, i64* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test fptoui
|
||||
|
||||
define void @fptoui_float_i32(float %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptoui_float_i32
|
||||
%b.addr = alloca i32, align 4
|
||||
%conv = fptoui float %a to i32
|
||||
; ELF64: fctidz
|
||||
; ELF64: stfd
|
||||
; ELF64: lwz
|
||||
store i32 %conv, i32* %b.addr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptoui_float_i64(float %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptoui_float_i64
|
||||
%b.addr = alloca i64, align 4
|
||||
%conv = fptoui float %a to i64
|
||||
; ELF64: fctiduz
|
||||
; ELF64: stfd
|
||||
; ELF64: ld
|
||||
store i64 %conv, i64* %b.addr, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptoui_double_i32(double %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptoui_double_i32
|
||||
%b.addr = alloca i32, align 8
|
||||
%conv = fptoui double %a to i32
|
||||
; ELF64: fctidz
|
||||
; ELF64: stfd
|
||||
; ELF64: lwz
|
||||
store i32 %conv, i32* %b.addr, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @fptoui_double_i64(double %a) nounwind ssp {
|
||||
entry:
|
||||
; ELF64: fptoui_double_i64
|
||||
%b.addr = alloca i64, align 8
|
||||
%conv = fptoui double %a to i64
|
||||
; ELF64: fctiduz
|
||||
; ELF64: stfd
|
||||
; ELF64: ld
|
||||
store i64 %conv, i64* %b.addr, align 8
|
||||
ret void
|
||||
}
|
23
test/CodeGen/PowerPC/spill-nor0.ll
Normal file
23
test/CodeGen/PowerPC/spill-nor0.ll
Normal file
@ -0,0 +1,23 @@
|
||||
; RUN: llc < %s -O0 -mcpu=ppc64 | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @_ZN4llvm3sys17RunningOnValgrindEv() #0 {
|
||||
entry:
|
||||
br i1 undef, label %if.then, label %if.end
|
||||
|
||||
if.then: ; preds = %entry
|
||||
ret void
|
||||
|
||||
if.end: ; preds = %entry
|
||||
%0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0
|
||||
unreachable
|
||||
|
||||
; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv
|
||||
; CHECK: stw
|
||||
; CHECK: lwz
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
38
test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
Normal file
38
test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; taken from X86 version of the same test
|
||||
; RUN: llc -mtriple=powerpc-apple-darwin10 -O0 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=powerpc-apple-darwin9 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
|
||||
; RUN: llc -mtriple=powerpc-apple-darwin8 -O0 < %s | FileCheck --check-prefix=CHECK-D89 %s
|
||||
|
||||
@v1 = linkonce_odr global i32 32
|
||||
; CHECK: .globl _v1
|
||||
; CHECK: .weak_def_can_be_hidden _v1
|
||||
|
||||
; CHECK-D89: .globl _v1
|
||||
; CHECK-D89: .weak_definition _v1
|
||||
|
||||
define i32 @f1() {
|
||||
%x = load i32 * @v1
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
@v2 = linkonce_odr global i32 32
|
||||
; CHECK: .globl _v2
|
||||
; CHECK: .weak_definition _v2
|
||||
|
||||
; CHECK-D89: .globl _v2
|
||||
; CHECK-D89: .weak_definition _v2
|
||||
|
||||
@v3 = linkonce_odr unnamed_addr global i32 32
|
||||
; CHECK: .globl _v3
|
||||
; CHECK: .weak_def_can_be_hidden _v3
|
||||
|
||||
; CHECK-D89: .globl _v3
|
||||
; CHECK-D89: .weak_definition _v3
|
||||
|
||||
define i32* @f2() {
|
||||
ret i32* @v2
|
||||
}
|
||||
|
||||
define i32* @f3() {
|
||||
ret i32* @v3
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; XFAIL: *
|
||||
|
||||
; CHECK: @bfe_def
|
||||
; CHECK: BFE_UINT
|
||||
define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
|
||||
|
@ -9,7 +9,7 @@
|
||||
; R600-CHECK-NOT: AND
|
||||
; R600-CHECK: |PV.{{[XYZW]}}|
|
||||
; SI-CHECK-LABEL: @fabs_free
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_AND_B32
|
||||
|
||||
define void @fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
@ -23,8 +23,8 @@ entry:
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; SI-CHECK-LABEL: @fabs_v2
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_AND_B32
|
||||
; SI-CHECK: V_AND_B32
|
||||
define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
@ -38,10 +38,10 @@ entry:
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; SI-CHECK-LABEL: @fabs_v4
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 1, 0, 0, 0
|
||||
; SI-CHECK: V_AND_B32
|
||||
; SI-CHECK: V_AND_B32
|
||||
; SI-CHECK: V_AND_B32
|
||||
; SI-CHECK: V_AND_B32
|
||||
define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
|
55
test/CodeGen/R600/fneg-fabs.ll
Normal file
55
test/CodeGen/R600/fneg-fabs.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
|
||||
; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
|
||||
|
||||
; DAGCombiner will transform:
|
||||
; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
|
||||
; unless isFabsFree returns true
|
||||
|
||||
; R600-CHECK-LABEL: @fneg_fabs_free
|
||||
; R600-CHECK-NOT: AND
|
||||
; R600-CHECK: |PV.{{[XYZW]}}|
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg_fabs_free
|
||||
; SI-CHECK: V_OR_B32
|
||||
|
||||
define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
|
||||
entry:
|
||||
%0 = bitcast i32 %in to float
|
||||
%1 = call float @fabs(float %0)
|
||||
%2 = fsub float -0.000000e+00, %1
|
||||
store float %2, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; R600-CHECK-LABEL: @fneg_fabs_v2
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: -PV
|
||||
; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg_fabs_v2
|
||||
; SI-CHECK: V_OR_B32
|
||||
; SI-CHECK: V_OR_B32
|
||||
define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
|
||||
%1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0
|
||||
store <2 x float> %1, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @fneg_fabs_v4
|
||||
; SI-CHECK: V_OR_B32
|
||||
; SI-CHECK: V_OR_B32
|
||||
; SI-CHECK: V_OR_B32
|
||||
; SI-CHECK: V_OR_B32
|
||||
define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
|
||||
%1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
|
||||
store <4 x float> %1, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @fabs(float ) readnone
|
||||
declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
|
||||
declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
|
@ -4,7 +4,7 @@
|
||||
; R600-CHECK-LABEL: @fneg
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_XOR_B32
|
||||
define void @fneg(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = fsub float -0.000000e+00, %in
|
||||
@ -16,8 +16,8 @@ entry:
|
||||
; R600-CHECK: -PV
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg_v2
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_XOR_B32
|
||||
; SI-CHECK: V_XOR_B32
|
||||
define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
|
||||
@ -31,10 +31,10 @@ entry:
|
||||
; R600-CHECK: -PV
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg_v4
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 v{{[0-9]}}, s{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_XOR_B32
|
||||
; SI-CHECK: V_XOR_B32
|
||||
; SI-CHECK: V_XOR_B32
|
||||
; SI-CHECK: V_XOR_B32
|
||||
define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
|
||||
|
28
test/CodeGen/R600/lds-oqap-crash.ll
Normal file
28
test/CodeGen/R600/lds-oqap-crash.ll
Normal file
@ -0,0 +1,28 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; The test is for a bug in R600EmitClauseMarkers.cpp where this pass
|
||||
; was searching for a use of the OQAP register in order to determine
|
||||
; if an LDS instruction could fit in the current clause, but never finding
|
||||
; one. This created an infinite loop and hung the compiler.
|
||||
;
|
||||
; The LDS instruction should not have been defining OQAP in the first place,
|
||||
; because the LDS instructions are pseudo instructions and the OQAP
|
||||
; reads and writes are bundled together in the same instruction.
|
||||
|
||||
; CHECK: @lds_crash
|
||||
define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
|
||||
entry:
|
||||
%0 = load i32 addrspace(3)* %in
|
||||
; This block needs to be > 115 ISA instructions to hit the bug,
|
||||
; so we'll use udiv instructions.
|
||||
%div0 = udiv i32 %0, %b
|
||||
%div1 = udiv i32 %div0, %a
|
||||
%div2 = udiv i32 %div1, 11
|
||||
%div3 = udiv i32 %div2, %a
|
||||
%div4 = udiv i32 %div3, %b
|
||||
%div5 = udiv i32 %div4, %c
|
||||
%div6 = udiv i32 %div5, %div0
|
||||
%div7 = udiv i32 %div6, %div1
|
||||
store i32 %div7, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
18
test/CodeGen/R600/llvm.AMDGPU.kill.ll
Normal file
18
test/CodeGen/R600/llvm.AMDGPU.kill.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: @kill_gs
|
||||
; SI: V_CMPX_LE_F32
|
||||
|
||||
define void @kill_gs() #0 {
|
||||
main_body:
|
||||
%0 = icmp ule i32 0, 3
|
||||
%1 = select i1 %0, float 1.000000e+00, float -1.000000e+00
|
||||
call void @llvm.AMDGPU.kill(float %1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float)
|
||||
|
||||
attributes #0 = { "ShaderType"="2" }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
40
test/CodeGen/R600/llvm.SI.load.dword.ll
Normal file
40
test/CodeGen/R600/llvm.SI.load.dword.ll
Normal file
@ -0,0 +1,40 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; Example of a simple geometry shader loading vertex attributes from the
|
||||
; ESGS ring buffer
|
||||
|
||||
; CHECK-LABEL: @main
|
||||
; CHECK: BUFFER_LOAD_DWORD
|
||||
; CHECK: BUFFER_LOAD_DWORD
|
||||
; CHECK: BUFFER_LOAD_DWORD
|
||||
; CHECK: BUFFER_LOAD_DWORD
|
||||
|
||||
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
|
||||
main_body:
|
||||
%10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
|
||||
%11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
|
||||
%12 = shl i32 %6, 2
|
||||
%13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||
%14 = bitcast i32 %13 to float
|
||||
%15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||
%16 = bitcast i32 %15 to float
|
||||
%17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
|
||||
%18 = bitcast i32 %17 to float
|
||||
%19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||
%20 = bitcast i32 %19 to float
|
||||
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
21
test/CodeGen/R600/llvm.SI.sendmsg.ll
Normal file
21
test/CodeGen/R600/llvm.SI.sendmsg.ll
Normal file
@ -0,0 +1,21 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: @main
|
||||
; CHECK: S_SENDMSG 34
|
||||
; CHECK: S_SENDMSG 274
|
||||
; CHECK: S_SENDMSG 562
|
||||
; CHECK: S_SENDMSG 3
|
||||
|
||||
define void @main() {
|
||||
main_body:
|
||||
call void @llvm.SI.sendmsg(i32 34, i32 0);
|
||||
call void @llvm.SI.sendmsg(i32 274, i32 0);
|
||||
call void @llvm.SI.sendmsg(i32 562, i32 0);
|
||||
call void @llvm.SI.sendmsg(i32 3, i32 0);
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.SI.sendmsg(i32, i32) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
@ -445,6 +445,7 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
|
||||
; R600-CHECK: LDS_UBYTE_READ_RET
|
||||
; SI-CHECK-LABEL: @load_i8_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U8
|
||||
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
|
||||
%1 = load i8 addrspace(3)* %in
|
||||
@ -458,6 +459,7 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
|
||||
; R600-CHECK: ASHR
|
||||
; SI-CHECK-LABEL: @load_i8_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I8
|
||||
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
|
||||
entry:
|
||||
@ -472,6 +474,7 @@ entry:
|
||||
; R600-CHECK: LDS_UBYTE_READ_RET
|
||||
; SI-CHECK-LABEL: @load_v2i8_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U8
|
||||
; SI-CHECK: DS_READ_U8
|
||||
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
|
||||
@ -489,6 +492,7 @@ entry:
|
||||
; R600-CHECK-DAG: ASHR
|
||||
; SI-CHECK-LABEL: @load_v2i8_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I8
|
||||
; SI-CHECK: DS_READ_I8
|
||||
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
|
||||
@ -506,6 +510,7 @@ entry:
|
||||
; R600-CHECK: LDS_UBYTE_READ_RET
|
||||
; SI-CHECK-LABEL: @load_v4i8_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U8
|
||||
; SI-CHECK: DS_READ_U8
|
||||
; SI-CHECK: DS_READ_U8
|
||||
@ -529,6 +534,7 @@ entry:
|
||||
; R600-CHECK-DAG: ASHR
|
||||
; SI-CHECK-LABEL: @load_v4i8_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I8
|
||||
; SI-CHECK: DS_READ_I8
|
||||
; SI-CHECK: DS_READ_I8
|
||||
@ -546,6 +552,7 @@ entry:
|
||||
; R600-CHECK: LDS_USHORT_READ_RET
|
||||
; SI-CHECK-LABEL: @load_i16_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U16
|
||||
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
|
||||
entry:
|
||||
@ -560,6 +567,7 @@ entry:
|
||||
; R600-CHECK: ASHR
|
||||
; SI-CHECK-LABEL: @load_i16_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I16
|
||||
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
|
||||
entry:
|
||||
@ -574,6 +582,7 @@ entry:
|
||||
; R600-CHECK: LDS_USHORT_READ_RET
|
||||
; SI-CHECK-LABEL: @load_v2i16_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U16
|
||||
; SI-CHECK: DS_READ_U16
|
||||
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
|
||||
@ -591,6 +600,7 @@ entry:
|
||||
; R600-CHECK-DAG: ASHR
|
||||
; SI-CHECK-LABEL: @load_v2i16_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I16
|
||||
; SI-CHECK: DS_READ_I16
|
||||
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
|
||||
@ -608,6 +618,7 @@ entry:
|
||||
; R600-CHECK: LDS_USHORT_READ_RET
|
||||
; SI-CHECK-LABEL: @load_v4i16_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_U16
|
||||
; SI-CHECK: DS_READ_U16
|
||||
; SI-CHECK: DS_READ_U16
|
||||
@ -631,6 +642,7 @@ entry:
|
||||
; R600-CHECK-DAG: ASHR
|
||||
; SI-CHECK-LABEL: @load_v4i16_sext_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_I16
|
||||
; SI-CHECK: DS_READ_I16
|
||||
; SI-CHECK: DS_READ_I16
|
||||
@ -643,11 +655,12 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; load an i32 value from the glocal address space.
|
||||
; load an i32 value from the local address space.
|
||||
; R600-CHECK-LABEL: @load_i32_local
|
||||
; R600-CHECK: LDS_READ_RET
|
||||
; SI-CHECK-LABEL: @load_i32_local
|
||||
; SI-CHECK-NOT: S_WQM_B64
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_B32
|
||||
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
|
||||
entry:
|
||||
@ -656,10 +669,11 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; load a f32 value from the global address space.
|
||||
; load a f32 value from the local address space.
|
||||
; R600-CHECK-LABEL: @load_f32_local
|
||||
; R600-CHECK: LDS_READ_RET
|
||||
; SI-CHECK-LABEL: @load_f32_local
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_B32
|
||||
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
|
||||
entry:
|
||||
@ -673,6 +687,7 @@ entry:
|
||||
; R600-CHECK: LDS_READ_RET
|
||||
; R600-CHECK: LDS_READ_RET
|
||||
; SI-CHECK-LABEL: @load_v2f32_local
|
||||
; SI-CHECK: S_MOV_B32 m0
|
||||
; SI-CHECK: DS_READ_B32
|
||||
; SI-CHECK: DS_READ_B32
|
||||
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
|
||||
|
@ -28,3 +28,13 @@ define void @trunc_shl_i64(i32 addrspace(1)* %out, i64 %a) {
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @trunc_i32_to_i1:
|
||||
; SI: V_AND_B32
|
||||
; SI: V_CMP_EQ_I32
|
||||
define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
|
||||
%trunc = trunc i32 %a to i1
|
||||
%result = select i1 %trunc, i32 1, i32 0
|
||||
store i32 %result, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
29
test/CodeGen/R600/vtx-fetch-branch.ll
Normal file
29
test/CodeGen/R600/vtx-fetch-branch.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; RUN: llc -march=r600 -mcpu=redwood %s -o - | FileCheck %s
|
||||
|
||||
; This tests for a bug where vertex fetch clauses right before an ENDIF
|
||||
; instruction where being emitted after the ENDIF. We were using ALU_POP_AFTER
|
||||
; for the ALU clause before the vetex fetch instead of emitting a POP instruction
|
||||
; after the fetch clause.
|
||||
|
||||
|
||||
; CHECK-LABEL: @test
|
||||
; CHECK-NOT: ALU_POP_AFTER
|
||||
; CHECK: TEX
|
||||
; CHECK-NEXT: POP
|
||||
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %cond, 0
|
||||
br i1 %0, label %endif, label %if
|
||||
|
||||
if:
|
||||
%1 = load i32 addrspace(1)* %in
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%x = phi i32 [ %1, %if], [ 0, %entry]
|
||||
store i32 %x, i32 addrspace(1)* %out
|
||||
br label %done
|
||||
|
||||
done:
|
||||
ret void
|
||||
}
|
@ -16,3 +16,13 @@ entry:
|
||||
store i64 %2, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @testi1toi32
|
||||
; SI-CHECK: V_CNDMASK_B32
|
||||
define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
entry:
|
||||
%0 = icmp eq i32 %a, %b
|
||||
%1 = zext i1 %0 to i32
|
||||
store i32 %1, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
|
||||
; RUN: llc < %s -march=x86 -mcpu=core2 > %t1
|
||||
; RUN: grep movzwl %t1 | count 2
|
||||
; RUN: grep movzbl %t1 | count 1
|
||||
; RUN: grep movd %t1 | count 4
|
||||
|
19
test/CodeGen/X86/bswap-vector.ll
Normal file
19
test/CodeGen/X86/bswap-vector.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc < %s -mcpu=core | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||||
|
||||
define <2 x i64> @foo(<2 x i64> %v) #0 {
|
||||
entry:
|
||||
%r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
|
||||
ret <2 x i64> %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK: bswapq
|
||||
; CHECK: bswapq
|
||||
; CHECK: ret
|
||||
|
||||
attributes #0 = { nounwind uwtable }
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
|
||||
|
||||
; VFMADD
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=x86-64 -mattr=+avx,-fma4 -mtriple=x86_64-apple-darwin -enable-unsafe-fp-math < %s | FileCheck %s
|
||||
; RUN: llc -march=x86-64 -mcpu=corei7-avx -enable-unsafe-fp-math < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test1
|
||||
define float @test1(float %a) {
|
||||
|
12
test/CodeGen/X86/inline-asm-modifier-q.ll
Normal file
12
test/CodeGen/X86/inline-asm-modifier-q.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
; If the target does not have 64-bit integer registers, emit 32-bit register
|
||||
; names.
|
||||
|
||||
; CHECK: movq (%e{{[abcd]}}x, %ebx, 4)
|
||||
|
||||
define void @q_modifier(i32* %p) {
|
||||
entry:
|
||||
tail call void asm sideeffect "movq (${0:q}, %ebx, 4), %mm0", "r,~{dirflag},~{fpsr},~{flags}"(i32* %p)
|
||||
ret void
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user