Import llvm 3.7.0 release (r246257).
This commit is contained in:
parent
ee8648bdac
commit
69156b4c20
@ -61,7 +61,7 @@ set(CMAKE_MODULE_PATH
|
||||
set(LLVM_VERSION_MAJOR 3)
|
||||
set(LLVM_VERSION_MINOR 7)
|
||||
set(LLVM_VERSION_PATCH 0)
|
||||
set(LLVM_VERSION_SUFFIX svn)
|
||||
set(LLVM_VERSION_SUFFIX "")
|
||||
|
||||
if (NOT PACKAGE_VERSION)
|
||||
set(PACKAGE_VERSION
|
||||
@ -518,7 +518,7 @@ if (APPLE)
|
||||
else(UNIX)
|
||||
if(NOT DEFINED CMAKE_INSTALL_RPATH)
|
||||
set(CMAKE_INSTALL_RPATH "\$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}")
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES FreeBSD)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,origin")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin")
|
||||
endif()
|
||||
@ -544,12 +544,12 @@ if(LLVM_USE_HOST_TOOLS)
|
||||
include(CrossCompile)
|
||||
endif(LLVM_USE_HOST_TOOLS)
|
||||
|
||||
if( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
|
||||
# On FreeBSD, /usr/local/* is not used by default. In order to build LLVM
|
||||
# with libxml2, iconv.h, etc., we must add /usr/local paths.
|
||||
include_directories("/usr/local/include")
|
||||
link_directories("/usr/local/lib")
|
||||
endif( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
|
||||
endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
|
||||
|
||||
if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h")
|
||||
|
44
CREDITS.TXT
44
CREDITS.TXT
@ -465,3 +465,47 @@ N: Bob Wilson
|
||||
E: bob.wilson@acm.org
|
||||
D: Advanced SIMD (NEON) support in the ARM backend.
|
||||
|
||||
N: Alexey Bataev
|
||||
E: a.bataev@hotmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Andrey Bokhanko
|
||||
E: andreybokhanko@gmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Carlo Bertolli
|
||||
E: cbertol@us.ibm.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Eric Stotzer
|
||||
E: estotzer@ti.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Kelvin Li
|
||||
E: kkwli0@gmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Samuel Antao
|
||||
E: sfantao@us.ibm.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Sergey Ostanevich
|
||||
E: sergos.gnu@gmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Alexandre Eichenberger
|
||||
E: alexe@us.ibm.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Guansong Zhang
|
||||
E: guansong.zhang@amd.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Sunita Chandrasekaran
|
||||
E: sunisg123@gmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
N: Michael Wong
|
||||
E: fraggamuffin@gmail.com
|
||||
D: Clang OpenMP implementation
|
||||
|
||||
|
@ -58,7 +58,7 @@ LLVM_OBJ_ROOT := $(call realpath, @abs_top_builddir@)
|
||||
PROJ_SRC_ROOT := $(LLVM_SRC_ROOT)
|
||||
PROJ_SRC_DIR := $(LLVM_SRC_ROOT)$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR))
|
||||
|
||||
# See: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20150323/268067.html
|
||||
# See: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150323/268067.html
|
||||
ifeq ($(LLVM_SRC_ROOT), $(LLVM_OBJ_ROOT))
|
||||
$(error In-source builds are not allowed. Please configure from a separate build directory!)
|
||||
endif
|
||||
|
@ -32,12 +32,12 @@ dnl===-----------------------------------------------------------------------===
|
||||
dnl Initialize autoconf and define the package name, version number and
|
||||
dnl address for reporting bugs.
|
||||
|
||||
AC_INIT([LLVM],[3.7.0svn],[http://llvm.org/bugs/])
|
||||
AC_INIT([LLVM],[3.7.0],[http://llvm.org/bugs/])
|
||||
|
||||
LLVM_VERSION_MAJOR=3
|
||||
LLVM_VERSION_MINOR=7
|
||||
LLVM_VERSION_PATCH=0
|
||||
LLVM_VERSION_SUFFIX=svn
|
||||
LLVM_VERSION_SUFFIX=
|
||||
|
||||
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
|
||||
AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API])
|
||||
|
@ -131,7 +131,7 @@ endif()
|
||||
|
||||
# Pass -Wl,-z,defs. This makes sure all symbols are defined. Otherwise a DSO
|
||||
# build might work on ELF but fail on MachO/COFF.
|
||||
if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR
|
||||
if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR CYGWIN OR
|
||||
${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") AND
|
||||
NOT LLVM_USE_SANITIZER)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,defs")
|
||||
|
20
configure
vendored
20
configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.60 for LLVM 3.7.0svn.
|
||||
# Generated by GNU Autoconf 2.60 for LLVM 3.7.0.
|
||||
#
|
||||
# Report bugs to <http://llvm.org/bugs/>.
|
||||
#
|
||||
@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='LLVM'
|
||||
PACKAGE_TARNAME='llvm'
|
||||
PACKAGE_VERSION='3.7.0svn'
|
||||
PACKAGE_STRING='LLVM 3.7.0svn'
|
||||
PACKAGE_VERSION='3.7.0'
|
||||
PACKAGE_STRING='LLVM 3.7.0'
|
||||
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
|
||||
|
||||
ac_unique_file="lib/IR/Module.cpp"
|
||||
@ -1333,7 +1333,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures LLVM 3.7.0svn to adapt to many kinds of systems.
|
||||
\`configure' configures LLVM 3.7.0 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1399,7 +1399,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of LLVM 3.7.0svn:";;
|
||||
short | recursive ) echo "Configuration of LLVM 3.7.0:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1583,7 +1583,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
LLVM configure 3.7.0svn
|
||||
LLVM configure 3.7.0
|
||||
generated by GNU Autoconf 2.60
|
||||
|
||||
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
|
||||
@ -1599,7 +1599,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by LLVM $as_me 3.7.0svn, which was
|
||||
It was created by LLVM $as_me 3.7.0, which was
|
||||
generated by GNU Autoconf 2.60. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -1956,7 +1956,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
LLVM_VERSION_MAJOR=3
|
||||
LLVM_VERSION_MINOR=7
|
||||
LLVM_VERSION_PATCH=0
|
||||
LLVM_VERSION_SUFFIX=svn
|
||||
LLVM_VERSION_SUFFIX=
|
||||
|
||||
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
@ -18610,7 +18610,7 @@ exec 6>&1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by LLVM $as_me 3.7.0svn, which was
|
||||
This file was extended by LLVM $as_me 3.7.0, which was
|
||||
generated by GNU Autoconf 2.60. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -18663,7 +18663,7 @@ Report bugs to <bug-autoconf@gnu.org>."
|
||||
_ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF
|
||||
ac_cs_version="\\
|
||||
LLVM config.status 3.7.0svn
|
||||
LLVM config.status 3.7.0
|
||||
configured by $0, generated by GNU Autoconf 2.60,
|
||||
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
|
||||
|
||||
|
@ -173,7 +173,7 @@ Notes for code generation
|
||||
also expected to generate an i8 store as an i8 store, and not an instruction
|
||||
which writes to surrounding bytes. (If you are writing a backend for an
|
||||
architecture which cannot satisfy these restrictions and cares about
|
||||
concurrency, please send an email to llvmdev.)
|
||||
concurrency, please send an email to llvm-dev.)
|
||||
|
||||
Unordered
|
||||
---------
|
||||
|
@ -387,6 +387,10 @@ LLVM-specific variables
|
||||
``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise this has no
|
||||
effect.
|
||||
|
||||
**LLVM_DOXYGEN_SVG**:BOOL
|
||||
Uses .svg files instead of .png files for graphs in the Doxygen output.
|
||||
Defaults to OFF.
|
||||
|
||||
**LLVM_ENABLE_SPHINX**:BOOL
|
||||
If enabled CMake will search for the ``sphinx-build`` executable and will make
|
||||
the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available.
|
||||
|
@ -56,6 +56,14 @@ if (LLVM_ENABLE_DOXYGEN)
|
||||
set(llvm_doxygen_qhp_cust_filter_attrs "")
|
||||
endif()
|
||||
|
||||
option(LLVM_DOXYGEN_SVG
|
||||
"Use svg instead of png files for doxygen graphs." OFF)
|
||||
if (LLVM_DOXYGEN_SVG)
|
||||
set(DOT_IMAGE_FORMAT "svg")
|
||||
else()
|
||||
set(DOT_IMAGE_FORMAT "png")
|
||||
endif()
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doxygen.cfg.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg @ONLY)
|
||||
|
||||
@ -73,6 +81,7 @@ if (LLVM_ENABLE_DOXYGEN)
|
||||
set(llvm_doxygen_qhelpgenerator_path)
|
||||
set(llvm_doxygen_qhp_cust_filter_name)
|
||||
set(llvm_doxygen_qhp_cust_filter_attrs)
|
||||
set(DOT_IMAGE_FORMAT)
|
||||
|
||||
add_custom_target(doxygen-llvm
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg
|
||||
|
@ -1814,6 +1814,7 @@ Here is the table:
|
||||
:raw-html:`<th>SystemZ</th>`
|
||||
:raw-html:`<th>X86</th>`
|
||||
:raw-html:`<th>XCore</th>`
|
||||
:raw-html:`<th>eBPF</th>`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1828,6 +1829,7 @@ Here is the table:
|
||||
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1842,6 +1844,7 @@ Here is the table:
|
||||
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="no"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="no"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1856,6 +1859,7 @@ Here is the table:
|
||||
:raw-html:`<td class="no"></td> <!-- Sparc -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1870,6 +1874,7 @@ Here is the table:
|
||||
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="no"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1884,6 +1889,7 @@ Here is the table:
|
||||
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="no"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1898,6 +1904,7 @@ Here is the table:
|
||||
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="no"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1912,6 +1919,7 @@ Here is the table:
|
||||
:raw-html:`<td class="no"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="yes"></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="no"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="no"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`<tr>`
|
||||
@ -1926,6 +1934,7 @@ Here is the table:
|
||||
:raw-html:`<td class="no"></td> <!-- SystemZ -->`
|
||||
:raw-html:`<td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->`
|
||||
:raw-html:`<td class="no"></td> <!-- XCore -->`
|
||||
:raw-html:`<td class="no"></td> <!-- eBPF -->`
|
||||
:raw-html:`</tr>`
|
||||
|
||||
:raw-html:`</table>`
|
||||
@ -2448,3 +2457,191 @@ Code Generator Options:
|
||||
:raw-html:`</tr>`
|
||||
:raw-html:`</table>`
|
||||
|
||||
The extended Berkeley Packet Filter (eBPF) backend
|
||||
--------------------------------------------------
|
||||
|
||||
Extended BPF (or eBPF) is similar to the original ("classic") BPF (cBPF) used
|
||||
to filter network packets. The
|
||||
`bpf() system call <http://man7.org/linux/man-pages/man2/bpf.2.html>`_
|
||||
performs a range of operations related to eBPF. For both cBPF and eBPF
|
||||
programs, the Linux kernel statically analyzes the programs before loading
|
||||
them, in order to ensure that they cannot harm the running system. eBPF is
|
||||
a 64-bit RISC instruction set designed for one to one mapping to 64-bit CPUs.
|
||||
Opcodes are 8-bit encoded, and 87 instructions are defined. There are 10
|
||||
registers, grouped by function as outlined below.
|
||||
|
||||
::
|
||||
|
||||
R0 return value from in-kernel functions; exit value for eBPF program
|
||||
R1 - R5 function call arguments to in-kernel functions
|
||||
R6 - R9 callee-saved registers preserved by in-kernel functions
|
||||
R10 stack frame pointer (read only)
|
||||
|
||||
Instruction encoding (arithmetic and jump)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
eBPF is reusing most of the opcode encoding from classic to simplify conversion
|
||||
of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
|
||||
field is divided into three parts:
|
||||
|
||||
::
|
||||
|
||||
+----------------+--------+--------------------+
|
||||
| 4 bits | 1 bit | 3 bits |
|
||||
| operation code | source | instruction class |
|
||||
+----------------+--------+--------------------+
|
||||
(MSB) (LSB)
|
||||
|
||||
Three LSB bits store instruction class which is one of:
|
||||
|
||||
::
|
||||
|
||||
BPF_LD 0x0
|
||||
BPF_LDX 0x1
|
||||
BPF_ST 0x2
|
||||
BPF_STX 0x3
|
||||
BPF_ALU 0x4
|
||||
BPF_JMP 0x5
|
||||
(unused) 0x6
|
||||
BPF_ALU64 0x7
|
||||
|
||||
When BPF_CLASS(code) == BPF_ALU or BPF_ALU64 or BPF_JMP,
|
||||
4th bit encodes source operand
|
||||
|
||||
::
|
||||
|
||||
BPF_X 0x0 use src_reg register as source operand
|
||||
BPF_K 0x1 use 32 bit immediate as source operand
|
||||
|
||||
and four MSB bits store operation code
|
||||
|
||||
::
|
||||
|
||||
BPF_ADD 0x0 add
|
||||
BPF_SUB 0x1 subtract
|
||||
BPF_MUL 0x2 multiply
|
||||
BPF_DIV 0x3 divide
|
||||
BPF_OR 0x4 bitwise logical OR
|
||||
BPF_AND 0x5 bitwise logical AND
|
||||
BPF_LSH 0x6 left shift
|
||||
BPF_RSH 0x7 right shift (zero extended)
|
||||
BPF_NEG 0x8 arithmetic negation
|
||||
BPF_MOD 0x9 modulo
|
||||
BPF_XOR 0xa bitwise logical XOR
|
||||
BPF_MOV 0xb move register to register
|
||||
BPF_ARSH 0xc right shift (sign extended)
|
||||
BPF_END 0xd endianness conversion
|
||||
|
||||
If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of
|
||||
|
||||
::
|
||||
|
||||
BPF_JA 0x0 unconditional jump
|
||||
BPF_JEQ 0x1 jump ==
|
||||
BPF_JGT 0x2 jump >
|
||||
BPF_JGE 0x3 jump >=
|
||||
BPF_JSET 0x4 jump if (DST & SRC)
|
||||
BPF_JNE 0x5 jump !=
|
||||
BPF_JSGT 0x6 jump signed >
|
||||
BPF_JSGE 0x7 jump signed >=
|
||||
BPF_CALL 0x8 function call
|
||||
BPF_EXIT 0x9 function return
|
||||
|
||||
Instruction encoding (load, store)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
For load and store instructions the 8-bit 'code' field is divided as:
|
||||
|
||||
::
|
||||
|
||||
+--------+--------+-------------------+
|
||||
| 3 bits | 2 bits | 3 bits |
|
||||
| mode | size | instruction class |
|
||||
+--------+--------+-------------------+
|
||||
(MSB) (LSB)
|
||||
|
||||
Size modifier is one of
|
||||
|
||||
::
|
||||
|
||||
BPF_W 0x0 word
|
||||
BPF_H 0x1 half word
|
||||
BPF_B 0x2 byte
|
||||
BPF_DW 0x3 double word
|
||||
|
||||
Mode modifier is one of
|
||||
|
||||
::
|
||||
|
||||
BPF_IMM 0x0 immediate
|
||||
BPF_ABS 0x1 used to access packet data
|
||||
BPF_IND 0x2 used to access packet data
|
||||
BPF_MEM 0x3 memory
|
||||
(reserved) 0x4
|
||||
(reserved) 0x5
|
||||
BPF_XADD 0x6 exclusive add
|
||||
|
||||
|
||||
Packet data access (BPF_ABS, BPF_IND)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
||||
Register R6 is an implicit input that must contain pointer to sk_buff.
|
||||
Register R0 is an implicit output which contains the data fetched
|
||||
from the packet. Registers R1-R5 are scratch registers and must not
|
||||
be used to store the data across BPF_ABS | BPF_LD or BPF_IND | BPF_LD
|
||||
instructions. These instructions have implicit program exit condition
|
||||
as well. When eBPF program is trying to access the data beyond
|
||||
the packet boundary, the interpreter will abort the execution of the program.
|
||||
|
||||
BPF_IND | BPF_W | BPF_LD is equivalent to:
|
||||
R0 = ntohl(\*(u32 \*) (((struct sk_buff \*) R6)->data + src_reg + imm32))
|
||||
|
||||
eBPF maps
|
||||
^^^^^^^^^
|
||||
|
||||
eBPF maps are provided for sharing data between kernel and user-space.
|
||||
Currently implemented types are hash and array, with potential extension to
|
||||
support bloom filters, radix trees, etc. A map is defined by its type,
|
||||
maximum number of elements, key size and value size in bytes. eBPF syscall
|
||||
supports create, update, find and delete functions on maps.
|
||||
|
||||
Function calls
|
||||
^^^^^^^^^^^^^^
|
||||
|
||||
Function call arguments are passed using up to five registers (R1 - R5).
|
||||
The return value is passed in a dedicated register (R0). Four additional
|
||||
registers (R6 - R9) are callee-saved, and the values in these registers
|
||||
are preserved within kernel functions. R0 - R5 are scratch registers within
|
||||
kernel functions, and eBPF programs must therefor store/restore values in
|
||||
these registers if needed across function calls. The stack can be accessed
|
||||
using the read-only frame pointer R10. eBPF registers map 1:1 to hardware
|
||||
registers on x86_64 and other 64-bit architectures. For example, x86_64
|
||||
in-kernel JIT maps them as
|
||||
|
||||
::
|
||||
|
||||
R0 - rax
|
||||
R1 - rdi
|
||||
R2 - rsi
|
||||
R3 - rdx
|
||||
R4 - rcx
|
||||
R5 - r8
|
||||
R6 - rbx
|
||||
R7 - r13
|
||||
R8 - r14
|
||||
R9 - r15
|
||||
R10 - rbp
|
||||
|
||||
since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
|
||||
and rbx, r12 - r15 are callee saved.
|
||||
|
||||
Program start
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
An eBPF program receives a single argument and contains
|
||||
a single eBPF main routine; the program does not contain eBPF functions.
|
||||
Function calls are limited to a predefined set of kernel functions. The size
|
||||
of a program is limited to 4K instructions: this ensures fast termination and
|
||||
a limited number of kernel function calls. Prior to running an eBPF program,
|
||||
a verifier performs static analysis to prevent loops in the code and
|
||||
to ensure valid register usage and operand types.
|
||||
|
@ -28,7 +28,7 @@ Note that some code bases (e.g. ``libc++``) have really good reasons to deviate
|
||||
from the coding standards. In the case of ``libc++``, this is because the
|
||||
naming and other conventions are dictated by the C++ standard. If you think
|
||||
there is a specific good reason to deviate from the standards here, please bring
|
||||
it up on the LLVMdev mailing list.
|
||||
it up on the LLVM-dev mailing list.
|
||||
|
||||
There are some conventions that are not uniformly followed in the code base
|
||||
(e.g. the naming convention). This is because they are relatively new, and a
|
||||
|
@ -30,7 +30,7 @@ This policy is also designed to accomplish the following objectives:
|
||||
This policy is aimed at frequent contributors to LLVM. People interested in
|
||||
contributing one-off patches can do so in an informal way by sending them to the
|
||||
`llvm-commits mailing list
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ and engaging another
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ and engaging another
|
||||
developer to see it through the process.
|
||||
|
||||
Developer Policies
|
||||
@ -47,23 +47,23 @@ Stay Informed
|
||||
-------------
|
||||
|
||||
Developers should stay informed by reading at least the "dev" mailing list for
|
||||
the projects you are interested in, such as `llvmdev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ for LLVM, `cfe-dev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_ for Clang, or `lldb-dev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev>`_ for LLDB. If you are
|
||||
the projects you are interested in, such as `llvm-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ for LLVM, `cfe-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/cfe-dev>`_ for Clang, or `lldb-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/lldb-dev>`_ for LLDB. If you are
|
||||
doing anything more than just casual work on LLVM, it is suggested that you also
|
||||
subscribe to the "commits" mailing list for the subproject you're interested in,
|
||||
such as `llvm-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_, `cfe-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits>`_, or `lldb-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits>`_. Reading the
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_, `cfe-commits
|
||||
<http://lists.llvm.org/mailman/listinfo/cfe-commits>`_, or `lldb-commits
|
||||
<http://lists.llvm.org/mailman/listinfo/lldb-commits>`_. Reading the
|
||||
"commits" list and paying attention to changes being made by others is a good
|
||||
way to see what other people are interested in and watching the flow of the
|
||||
project as a whole.
|
||||
|
||||
We recommend that active developers register an email account with `LLVM
|
||||
Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs>`_ email list to keep track
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-bugs>`_ email list to keep track
|
||||
of bugs and enhancements occurring in LLVM. We really appreciate people who are
|
||||
proactive at catching incoming bugs in their components and dealing with them
|
||||
promptly.
|
||||
@ -365,7 +365,7 @@ If you have recently been granted commit access, these policies apply:
|
||||
|
||||
#. You are granted *commit-after-approval* to all parts of LLVM. To get
|
||||
approval, submit a `patch`_ to `llvm-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved,
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_. When approved,
|
||||
you may commit it yourself.
|
||||
|
||||
#. You are allowed to commit patches without approval which you think are
|
||||
@ -394,8 +394,8 @@ Making a Major Change
|
||||
---------------------
|
||||
|
||||
When a developer begins a major new project with the aim of contributing it back
|
||||
to LLVM, they should inform the community with an email to the `llvmdev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ email list, to the extent
|
||||
to LLVM, they should inform the community with an email to the `llvm-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ email list, to the extent
|
||||
possible. The reason for this is to:
|
||||
|
||||
#. keep the community informed about future changes to LLVM,
|
||||
@ -608,7 +608,7 @@ LICENSE.txt files specifically indicate that they contain GPL code.
|
||||
|
||||
We have no plans to change the license of LLVM. If you have questions or
|
||||
comments about the license, please contact the `LLVM Developer's Mailing
|
||||
List <mailto:llvmdev@cs.uiuc.edu>`_.
|
||||
List <mailto:llvm-dev@lists.llvm.org>`_.
|
||||
|
||||
Patents
|
||||
-------
|
||||
|
@ -15,7 +15,7 @@ When you come to this realization, stop and think. Do you really need to extend
|
||||
LLVM? Is it a new fundamental capability that LLVM does not support at its
|
||||
current incarnation or can it be synthesized from already pre-existing LLVM
|
||||
elements? If you are not sure, ask on the `LLVM-dev
|
||||
<http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ list. The reason is that
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ list. The reason is that
|
||||
extending LLVM will get involved as you need to update all the different passes
|
||||
that you intend to use with your extension, and there are ``many`` LLVM analyses
|
||||
and transformations, so it may be quite a bit of work.
|
||||
|
@ -174,10 +174,10 @@ Adding to this document
|
||||
|
||||
If you run across a case that you feel deserves to be covered here, please send
|
||||
a patch to `llvm-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ for review.
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ for review.
|
||||
|
||||
If you have questions on these items, please direct them to `llvmdev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_. The more relevant
|
||||
If you have questions on these items, please direct them to `llvm-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_. The more relevant
|
||||
context you are able to give to your question, the more likely it is to be
|
||||
answered.
|
||||
|
||||
|
@ -714,9 +714,9 @@ used by people developing LLVM.
|
||||
| | the configure script. The default list is defined |
|
||||
| | as ``LLVM_ALL_TARGETS``, and can be set to include |
|
||||
| | out-of-tree targets. The default value includes: |
|
||||
| | ``AArch64, ARM, CppBackend, Hexagon, |
|
||||
| | Mips, MSP430, NVPTX, PowerPC, AMDGPU, Sparc, |
|
||||
| | SystemZ, X86, XCore``. |
|
||||
| | ``AArch64, AMDGPU, ARM, BPF, CppBackend, Hexagon, |
|
||||
| | Mips, MSP430, NVPTX, PowerPC, Sparc, SystemZ |
|
||||
| | X86, XCore``. |
|
||||
+-------------------------+----------------------------------------------------+
|
||||
| LLVM_ENABLE_DOXYGEN | Build doxygen-based documentation from the source |
|
||||
| | code This is disabled by default because it is |
|
||||
|
@ -6493,7 +6493,7 @@ Example:
|
||||
|
||||
%ptr = alloca i32 ; yields i32*:ptr
|
||||
store i32 3, i32* %ptr ; yields void
|
||||
%val = load i32* %ptr ; yields i32:val = i32 3
|
||||
%val = load i32, i32* %ptr ; yields i32:val = i32 3
|
||||
|
||||
.. _i_fence:
|
||||
|
||||
|
@ -31,6 +31,7 @@ $(PROJ_OBJ_DIR)/doxygen.cfg: doxygen.cfg.in
|
||||
-e 's/@llvm_doxygen_qhp_cust_filter_name@//g' \
|
||||
-e 's/@llvm_doxygen_qhp_namespace@//g' \
|
||||
-e 's/@searchengine_url@//g' \
|
||||
-e 's/@DOT_IMAGE_FORMAT@/png/g' \
|
||||
> $@
|
||||
endif
|
||||
|
||||
|
@ -150,7 +150,7 @@ Status
|
||||
|
||||
Please let us know whether you like it and what could be improved! We're still
|
||||
working on setting up a bug tracker, but you can email klimek-at-google-dot-com
|
||||
and chandlerc-at-gmail-dot-com and CC the llvmdev mailing list with questions
|
||||
and chandlerc-at-gmail-dot-com and CC the llvm-dev mailing list with questions
|
||||
until then. We also could use help implementing improvements. This sadly is
|
||||
really painful and hard because the Phabricator codebase is in PHP and not as
|
||||
testable as you might like. However, we've put exactly what we're deploying up
|
||||
|
@ -254,4 +254,4 @@ Further Help
|
||||
If you have any questions or need any help creating an LLVM project, the LLVM
|
||||
team would be more than happy to help. You can always post your questions to
|
||||
the `LLVM Developers Mailing List
|
||||
<http://lists.cs.uiuc.edu/pipermail/llvmdev/>`_.
|
||||
<http://lists.llvm.org/pipermail/llvm-dev/>`_.
|
||||
|
@ -5,12 +5,6 @@ LLVM 3.7 Release Notes
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 3.7 release. You may
|
||||
prefer the `LLVM 3.6 Release Notes <http://llvm.org/releases/3.6.0/docs
|
||||
/ReleaseNotes.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
@ -23,7 +17,7 @@ from the `LLVM releases web site <http://llvm.org/releases/>`_.
|
||||
For more information about LLVM, including information about the latest
|
||||
release, please check out the `main LLVM web site <http://llvm.org/>`_. If you
|
||||
have questions or comments, the `LLVM Developer's Mailing List
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
|
||||
them.
|
||||
|
||||
Note that if you are reading this file from a Subversion checkout or the main
|
||||
@ -48,46 +42,346 @@ Non-comprehensive list of changes in this release
|
||||
collection of tips for frontend authors on how to generate IR which LLVM is
|
||||
able to effectively optimize.
|
||||
|
||||
* The DataLayout is no longer optional. All the IR level optimizations expects
|
||||
* The ``DataLayout`` is no longer optional. All the IR level optimizations expects
|
||||
it to be present and the API has been changed to use a reference instead of
|
||||
a pointer to make it explicit. The Module owns the datalayout and it has to
|
||||
match the one attached to the TargetMachine for generating code.
|
||||
|
||||
* ... next change ...
|
||||
In 3.6, a pass was inserted in the pipeline to make the ``DataLayout`` accessible:
|
||||
``MyPassManager->add(new DataLayoutPass(MyTargetMachine->getDataLayout()));``
|
||||
In 3.7, you don't need a pass, you set the ``DataLayout`` on the ``Module``:
|
||||
``MyModule->setDataLayout(MyTargetMachine->createDataLayout());``
|
||||
|
||||
.. NOTE
|
||||
If you would like to document a larger change, then you can add a
|
||||
subsection about it right here. You can copy the following boilerplate
|
||||
and un-indent it (the indentation causes it to be inside this comment).
|
||||
The LLVM C API ``LLVMGetTargetMachineData`` is deprecated to reflect the fact
|
||||
that it won't be available anymore from ``TargetMachine`` in 3.8.
|
||||
|
||||
Special New Feature
|
||||
-------------------
|
||||
* Comdats are now orthogonal to the linkage. LLVM will not create
|
||||
comdats for weak linkage globals and the frontends are responsible
|
||||
for explicitly adding them.
|
||||
|
||||
Makes programs 10x faster by doing Special New Thing.
|
||||
* On ELF we now support multiple sections with the same name and
|
||||
comdat. This allows for smaller object files since multiple
|
||||
sections can have a simple name (`.text`, `.rodata`, etc).
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
* LLVM now lazily loads metadata in some cases. Creating archives
|
||||
with IR files with debug info is now 25X faster.
|
||||
|
||||
During this release ...
|
||||
* llvm-ar can create archives in the BSD format used by OS X.
|
||||
|
||||
* LLVM received a backend for the extended Berkely Packet Filter
|
||||
instruction set that can be dynamically loaded into the Linux kernel via the
|
||||
`bpf(2) <http://man7.org/linux/man-pages/man2/bpf.2.html>`_ syscall.
|
||||
|
||||
Support for BPF has been present in the kernel for some time, but starting
|
||||
from 3.18 has been extended with such features as: 64-bit registers, 8
|
||||
additional registers registers, conditional backwards jumps, call
|
||||
instruction, shift instructions, map (hash table, array, etc.), 1-8 byte
|
||||
load/store from stack, and more.
|
||||
|
||||
Up until now, users of BPF had to write bytecode by hand, or use
|
||||
custom generators. This release adds a proper LLVM backend target for the BPF
|
||||
bytecode architecture.
|
||||
|
||||
The BPF target is now available by default, and options exist in both Clang
|
||||
(-target bpf) or llc (-march=bpf) to pick eBPF as a backend.
|
||||
|
||||
* Switch-case lowering was rewritten to avoid generating unbalanced search trees
|
||||
(`PR22262 <http://llvm.org/pr22262>`_) and to exploit profile information
|
||||
when available. Some lowering strategies are now disabled when optimizations
|
||||
are turned off, to save compile time.
|
||||
|
||||
* The debug info IR class hierarchy now inherits from ``Metadata`` and has its
|
||||
own bitcode records and assembly syntax
|
||||
(`documented in LangRef <LangRef.html#specialized-metadata-nodes>`_). The debug
|
||||
info verifier has been merged with the main verifier.
|
||||
|
||||
* LLVM IR and APIs are in a period of transition to aid in the removal of
|
||||
pointer types (the end goal being that pointers are typeless/opaque - void*,
|
||||
if you will). Some APIs and IR constructs have been modified to take
|
||||
explicit types that are currently checked to match the target type of their
|
||||
pre-existing pointer type operands. Further changes are still needed, but the
|
||||
more you can avoid using ``PointerType::getPointeeType``, the easier the
|
||||
migration will be.
|
||||
|
||||
* Argument-less ``TargetMachine::getSubtarget`` and
|
||||
``TargetMachine::getSubtargetImpl`` have been removed from the tree. Updating
|
||||
out of tree ports is as simple as implementing a non-virtual version in the
|
||||
target, but implementing full ``Function`` based ``TargetSubtargetInfo``
|
||||
support is recommended.
|
||||
|
||||
* This is expected to be the last major release of LLVM that supports being
|
||||
run on Windows XP and Windows Vista. For the next major release the minimum
|
||||
Windows version requirement will be Windows 7.
|
||||
|
||||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
During this release the MIPS target has:
|
||||
|
||||
* Added support for MIPS32R3, MIPS32R5, MIPS32R3, MIPS32R5, and microMIPS32.
|
||||
|
||||
* Added support for dynamic stack realignment. This is of particular importance
|
||||
to MSA on 32-bit subtargets since vectors always exceed the stack alignment on
|
||||
the O32 ABI.
|
||||
|
||||
* Added support for compiler-rt including:
|
||||
|
||||
* Support for the Address, and Undefined Behaviour Sanitizers for all MIPS
|
||||
subtargets.
|
||||
|
||||
* Support for the Data Flow, and Memory Sanitizer for 64-bit subtargets.
|
||||
|
||||
* Support for the Profiler for all MIPS subtargets.
|
||||
|
||||
* Added support for libcxx, and libcxxabi.
|
||||
|
||||
* Improved inline assembly support such that memory constraints may now make use
|
||||
of the appropriate address offsets available to the instructions. Also, added
|
||||
support for the ``ZC`` constraint.
|
||||
|
||||
* Added support for 128-bit integers on 64-bit subtargets and 16-bit floating
|
||||
point conversions on all subtargets.
|
||||
|
||||
* Added support for read-only ``.eh_frame`` sections by storing type information
|
||||
indirectly.
|
||||
|
||||
* Added support for MCJIT on all 64-bit subtargets as well as MIPS32R6.
|
||||
|
||||
* Added support for fast instruction selection on MIPS32 and MIPS32R2 with PIC.
|
||||
|
||||
* Various bug fixes. Including the following notable fixes:
|
||||
|
||||
* Fixed 'jumpy' debug line info around calls where calculation of the address
|
||||
of the function would inappropriately change the line number.
|
||||
|
||||
* Fixed missing ``__mips_isa_rev`` macro on the MIPS32R6 and MIPS32R6
|
||||
subtargets.
|
||||
|
||||
* Fixed representation of NaN when targeting systems using traditional
|
||||
encodings. Traditionally, MIPS has used NaN encodings that were compatible
|
||||
with IEEE754-1985 but would later be found incompatible with IEEE754-2008.
|
||||
|
||||
* Fixed multiple segfaults and assertions in the disassembler when
|
||||
disassembling instructions that have memory operands.
|
||||
|
||||
* Fixed multiple cases of suboptimal code generation involving $zero.
|
||||
|
||||
* Fixed code generation of 128-bit shifts on 64-bit subtargets.
|
||||
|
||||
* Prevented the delay slot filler from filling call delay slots with
|
||||
instructions that modify or use $ra.
|
||||
|
||||
* Fixed some remaining N32/N64 calling convention bugs when using small
|
||||
structures on big-endian subtargets.
|
||||
|
||||
* Fixed missing sign-extensions that are required by the N32/N64 calling
|
||||
convention when generating calls to library functions with 32-bit
|
||||
parameters.
|
||||
|
||||
* Corrected the ``int64_t`` typedef to be ``long`` for N64.
|
||||
|
||||
* ``-mno-odd-spreg`` is now honoured for vector insertion/extraction
|
||||
operations when using -mmsa.
|
||||
|
||||
* Fixed vector insertion and extraction for MSA on 64-bit subtargets.
|
||||
|
||||
* Corrected the representation of member function pointers. This makes them
|
||||
usable on microMIPS subtargets.
|
||||
|
||||
Changes to the PowerPC Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
There are numerous improvements to the PowerPC target in this release:
|
||||
|
||||
* LLVM now supports the ISA 2.07B (POWER8) instruction set, including
|
||||
direct moves between general registers and vector registers, and
|
||||
built-in support for hardware transactional memory (HTM). Some missing
|
||||
instructions from ISA 2.06 (POWER7) were also added.
|
||||
|
||||
Changes to the OCaml bindings
|
||||
* Code generation for the local-dynamic and global-dynamic thread-local
|
||||
storage models has been improved.
|
||||
|
||||
* Loops may be restructured to leverage pre-increment loads and stores.
|
||||
|
||||
* QPX - The vector instruction set used by the IBM Blue Gene/Q supercomputers
|
||||
is now supported.
|
||||
|
||||
* Loads from the TOC area are now correctly treated as invariant.
|
||||
|
||||
* PowerPC now has support for i128 and v1i128 types. The types differ
|
||||
in how they are passed in registers for the ELFv2 ABI.
|
||||
|
||||
* Disassembly will now print shorter mnemonic aliases when available.
|
||||
|
||||
* Optional register name prefixes for VSX and QPX registers are now
|
||||
supported in the assembly parser.
|
||||
|
||||
* The back end now contains a pass to remove unnecessary vector swaps
|
||||
from POWER8 little-endian code generation. Additional improvements
|
||||
are planned for release 3.8.
|
||||
|
||||
* The undefined-behavior sanitizer (UBSan) is now supported for PowerPC.
|
||||
|
||||
* Many new vector programming APIs have been added to altivec.h.
|
||||
Additional ones are planned for release 3.8.
|
||||
|
||||
* PowerPC now supports __builtin_call_with_static_chain.
|
||||
|
||||
* PowerPC now supports the revised -mrecip option that permits finer
|
||||
control over reciprocal estimates.
|
||||
|
||||
* Many bugs have been identified and fixed.
|
||||
|
||||
Changes to the SystemZ Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
* LLVM no longer attempts to automatically detect the current host CPU when
|
||||
invoked natively.
|
||||
|
||||
* Support for all thread-local storage models. (Previous releases would support
|
||||
only the local-exec TLS model.)
|
||||
|
||||
* The POPCNT instruction is now used on z196 and above.
|
||||
|
||||
* The RISBGN instruction is now used on zEC12 and above.
|
||||
|
||||
* Support for the transactional-execution facility on zEC12 and above.
|
||||
|
||||
* Support for the z13 processor and its vector facility.
|
||||
|
||||
|
||||
Changes to the JIT APIs
|
||||
-----------------------
|
||||
|
||||
* Added a new C++ JIT API called On Request Compilation, or ORC.
|
||||
|
||||
ORC is a new JIT API inspired by MCJIT but designed to be more testable, and
|
||||
easier to extend with new features. A key new feature already in tree is lazy,
|
||||
function-at-a-time compilation for X86. Also included is a reimplementation of
|
||||
MCJIT's API and behavior (OrcMCJITReplacement). MCJIT itself remains in tree,
|
||||
and continues to be the default JIT ExecutionEngine, though new users are
|
||||
encouraged to try ORC out for their projects. (A good place to start is the
|
||||
new ORC tutorials under llvm/examples/kaleidoscope/orc).
|
||||
|
||||
Sub-project Status Update
|
||||
=========================
|
||||
|
||||
In addition to the core LLVM 3.7 distribution of production-quality compiler
|
||||
infrastructure, the LLVM project includes sub-projects that use the LLVM core
|
||||
and share the same distribution license. This section provides updates on these
|
||||
sub-projects.
|
||||
|
||||
Polly - The Polyhedral Loop Optimizer in LLVM
|
||||
---------------------------------------------
|
||||
|
||||
`Polly <http://polly.llvm.org>`_ is a polyhedral loop optimization
|
||||
infrastructure that provides data-locality optimizations to LLVM-based
|
||||
compilers. When compiled as part of clang or loaded as a module into clang,
|
||||
it can perform loop optimizations such as tiling, loop fusion or outer-loop
|
||||
vectorization. As a generic loop optimization infrastructure it allows
|
||||
developers to get a per-loop-iteration model of a loop nest on which detailed
|
||||
analysis and transformations can be performed.
|
||||
|
||||
Changes since the last release:
|
||||
|
||||
* isl imported into Polly distribution
|
||||
|
||||
`isl <http://repo.or.cz/w/isl.git>`_, the math library Polly uses, has been
|
||||
imported into the source code repository of Polly and is now distributed as part
|
||||
of Polly. As this was the last external library dependency of Polly, Polly can
|
||||
now be compiled right after checking out the Polly source code without the need
|
||||
for any additional libraries to be pre-installed.
|
||||
|
||||
* Small integer optimization of isl
|
||||
|
||||
The MIT licensed imath backend using in `isl <http://repo.or.cz/w/isl.git>`_ for
|
||||
arbitrary width integer computations has been optimized to use native integer
|
||||
operations for the common case where the operands of a computation fit into 32
|
||||
bit and to only fall back to large arbitrary precision integers for the
|
||||
remaining cases. This optimization has greatly improved the compile-time
|
||||
performance of Polly, both due to faster native operations also due to a
|
||||
reduction in malloc traffic and pointer indirections. As a result, computations
|
||||
that use arbitrary precision integers heavily have been speed up by almost 6x.
|
||||
As a result, the compile-time of Polly on the Polybench test kernels in the LNT
|
||||
suite has been reduced by 20% on average with compile time reductions between
|
||||
9-43%.
|
||||
|
||||
* Schedule Trees
|
||||
|
||||
Polly now uses internally so-called > Schedule Trees < to model the loop
|
||||
structure it optimizes. Schedule trees are an easy to understand tree structure
|
||||
that describes a loop nest using integer constraint sets to keep track of
|
||||
execution constraints. It allows the developer to use per-tree-node operations
|
||||
to modify the loop tree. Programatic analysis that work on the schedule tree
|
||||
(e.g., as dependence analysis) also show a visible speedup as they can exploit
|
||||
the tree structure of the schedule and need to fall back to ILP based
|
||||
optimization problems less often. Section 6 of `Polyhedral AST generation is
|
||||
more than scanning polyhedra
|
||||
<http://www.grosser.es/#pub-polyhedral-AST-generation>`_ gives a detailed
|
||||
explanation of this schedule trees.
|
||||
|
||||
* Scalar and PHI node modeling - Polly as an analysis
|
||||
|
||||
Polly now requires almost no preprocessing to analyse LLVM-IR, which makes it
|
||||
easier to use Polly as a pure analysis pass e.g. to provide more precise
|
||||
dependence information to non-polyhedral transformation passes. Originally,
|
||||
Polly required the input LLVM-IR to be preprocessed such that all scalar and
|
||||
PHI-node dependences are translated to in-memory operations. Since this release,
|
||||
Polly has full support for scalar and PHI node dependences and requires no
|
||||
scalar-to-memory translation for such kind of dependences.
|
||||
|
||||
* Modeling of modulo and non-affine conditions
|
||||
|
||||
Polly can now supports modulo operations such as A[t%2][i][j] as they appear
|
||||
often in stencil computations and also allows data-dependent conditional
|
||||
branches as they result e.g. from ternary conditions ala A[i] > 255 ? 255 :
|
||||
A[i].
|
||||
|
||||
* Delinearization
|
||||
|
||||
Polly now support the analysis of manually linearized multi-dimensional arrays
|
||||
as they result form macros such as
|
||||
"#define 2DARRAY(A,i,j) (A.data[(i) * A.size + (j)]". Similar constructs appear
|
||||
in old C code written before C99, C++ code such as boost::ublas, LLVM exported
|
||||
from Julia, Matlab generated code and many others. Our work titled
|
||||
`Optimistic Delinearization of Parametrically Sized Arrays
|
||||
<http://www.grosser.es/#pub-optimistic-delinerization>`_ gives details.
|
||||
|
||||
* Compile time improvements
|
||||
|
||||
Pratik Bahtu worked on compile-time performance tuning of Polly. His work
|
||||
together with the support for schedule trees and the small integer optimization
|
||||
in isl notably reduced the compile time.
|
||||
|
||||
* Increased compute timeouts
|
||||
|
||||
As Polly's compile time has been notabily improved, we were able to increase
|
||||
the compile time saveguards in Polly. As a result, the default configuration
|
||||
of Polly can now analyze larger loop nests without running into compile time
|
||||
restrictions.
|
||||
|
||||
* Export Debug Locations via JSCoP file
|
||||
|
||||
Polly's JSCoP import/export format gained support for debug locations that show
|
||||
to the user the source code location of detected scops.
|
||||
|
||||
* Improved windows support
|
||||
|
||||
The compilation of Polly on windows using cmake has been improved and several
|
||||
visual studio build issues have been addressed.
|
||||
|
||||
* Many bug fixes
|
||||
|
||||
libunwind
|
||||
---------
|
||||
|
||||
The unwind implementation which use to reside in `libc++abi` has been moved into
|
||||
a separate repository. This implementation can still be used for `libc++abi` by
|
||||
specifying `-DLIBCXXABI_USE_LLVM_UNWINDER=YES` and
|
||||
`-DLIBCXXABI_LIBUNWIND_PATH=<path to libunwind source>` when configuring
|
||||
`libc++abi`, which defaults to `true` when building on ARM.
|
||||
|
||||
The new repository can also be built standalone if just `libunwind` is desired.
|
||||
|
||||
External Open Source Projects Using LLVM 3.7
|
||||
============================================
|
||||
@ -96,7 +390,74 @@ An exciting aspect of LLVM is that it is used as an enabling technology for
|
||||
a lot of other language and tools projects. This section lists some of the
|
||||
projects that have already been updated to work with LLVM 3.7.
|
||||
|
||||
* A project
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
|
||||
PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
Portable Computing Language (pocl)
|
||||
----------------------------------
|
||||
|
||||
In addition to producing an easily portable open source OpenCL
|
||||
implementation, another major goal of `pocl <http://portablecl.org/>`_
|
||||
is improving performance portability of OpenCL programs with
|
||||
compiler optimizations, reducing the need for target-dependent manual
|
||||
optimizations. An important part of pocl is a set of LLVM passes used to
|
||||
statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers.
|
||||
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
|
||||
exposed datapath processors based on the Transport triggered
|
||||
architecture (TTA).
|
||||
|
||||
The toolset provides a complete co-design flow from C/C++
|
||||
programs down to synthesizable VHDL/Verilog and parallel program binaries.
|
||||
Processor customization points include the register files, function units,
|
||||
supported operations, and the interconnection network.
|
||||
|
||||
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
|
||||
optimizations and also for parts of code generation. It generates
|
||||
new LLVM-based code generators "on the fly" for the designed processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
|
||||
BPF Compiler Collection (BCC)
|
||||
-----------------------------
|
||||
`BCC <https://github.com/iovisor/bcc>`_ is a Python + C framework for tracing and
|
||||
networking that is using Clang rewriter + 2nd pass of Clang + BPF backend to
|
||||
generate eBPF and push it into the kernel.
|
||||
|
||||
LLVMSharp & ClangSharp
|
||||
----------------------
|
||||
|
||||
`LLVMSharp <http://www.llvmsharp.org>`_ and
|
||||
`ClangSharp <http://www.clangsharp.org>`_ are type-safe C# bindings for
|
||||
Microsoft.NET and Mono that Platform Invoke into the native libraries.
|
||||
ClangSharp is self-hosted and is used to generated LLVMSharp using the
|
||||
LLVM-C API.
|
||||
|
||||
`LLVMSharp Kaleidoscope Tutorials <http://www.llvmsharp.org/Kaleidoscope/>`_
|
||||
are instructive examples of writing a compiler in C#, with certain improvements
|
||||
like using the visitor pattern to generate LLVM IR.
|
||||
|
||||
`ClangSharp PInvoke Generator <http://www.clangsharp.org/PInvoke/>`_ is the
|
||||
self-hosting mechanism for LLVM/ClangSharp and is demonstrative of using
|
||||
LibClang to generate Platform Invoke (PInvoke) signatures for C APIs.
|
||||
|
||||
|
||||
Additional Information
|
||||
@ -111,4 +472,3 @@ going into the ``llvm/docs/`` directory in the LLVM tree.
|
||||
|
||||
If you have any questions or comments about LLVM, please feel free to contact
|
||||
us via the `mailing lists <http://llvm.org/docs/#maillist>`_.
|
||||
|
||||
|
@ -565,7 +565,7 @@ The existing IR Verifier pass has been extended to check most of the
|
||||
local restrictions on the intrinsics mentioned in their respective
|
||||
documentation. The current implementation in LLVM does not check the
|
||||
key relocation invariant, but this is ongoing work on developing such
|
||||
a verifier. Please ask on llvmdev if you're interested in
|
||||
a verifier. Please ask on llvm-dev if you're interested in
|
||||
experimenting with the current version.
|
||||
|
||||
.. _statepoint-utilities:
|
||||
@ -696,7 +696,7 @@ If you are scheduling the RewriteStatepointsForGC pass late in the pass order,
|
||||
you should probably schedule this pass immediately before it. The exception
|
||||
would be if you need to preserve abstract frame information (e.g. for
|
||||
deoptimization or introspection) at safepoints. In that case, ask on the
|
||||
llvmdev mailing list for suggestions.
|
||||
llvm-dev mailing list for suggestions.
|
||||
|
||||
|
||||
Bugs and Enhancements
|
||||
@ -707,8 +707,8 @@ tracked by performing a `bugzilla search
|
||||
<http://llvm.org/bugs/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
|
||||
for [Statepoint] in the summary field. When filing new bugs, please
|
||||
use this tag so that interested parties see the newly filed bug. As
|
||||
with most LLVM features, design discussions take place on `llvmdev
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_, and patches
|
||||
with most LLVM features, design discussions take place on `llvm-dev
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_, and patches
|
||||
should be sent to `llvm-commits
|
||||
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ for review.
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ for review.
|
||||
|
||||
|
@ -7,7 +7,7 @@ TableGen Language Introduction
|
||||
|
||||
.. warning::
|
||||
This document is extremely rough. If you find something lacking, please
|
||||
fix it, file a documentation bug, or ask about it on llvmdev.
|
||||
fix it, file a documentation bug, or ask about it on llvm-dev.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
@ -7,7 +7,7 @@ TableGen Language Reference
|
||||
|
||||
.. warning::
|
||||
This document is extremely rough. If you find something lacking, please
|
||||
fix it, file a documentation bug, or ask about it on llvmdev.
|
||||
fix it, file a documentation bug, or ask about it on llvm-dev.
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
@ -11,6 +11,7 @@
|
||||
# serve to show the default.
|
||||
|
||||
import sys, os
|
||||
from datetime import date
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
@ -40,7 +41,7 @@ master_doc = 'index'
|
||||
|
||||
# General information about the project.
|
||||
project = u'LLVM'
|
||||
copyright = u'2003-2014, LLVM Project'
|
||||
copyright = u'2003-%d, LLVM Project' % date.today().year
|
||||
|
||||
# The version info for the project you're documenting, acts as replacement for
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
|
@ -2205,7 +2205,7 @@ DIRECTORY_GRAPH = YES
|
||||
# The default value is: png.
|
||||
# This tag requires that the tag HAVE_DOT is set to YES.
|
||||
|
||||
DOT_IMAGE_FORMAT = png
|
||||
DOT_IMAGE_FORMAT = @DOT_IMAGE_FORMAT@
|
||||
|
||||
# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
|
||||
# enable generation of interactive SVG images that allow zooming and panning.
|
||||
|
@ -1,11 +1,6 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
@ -425,12 +420,12 @@ Mailing Lists
|
||||
If you can't find what you need in these docs, try consulting the mailing
|
||||
lists.
|
||||
|
||||
`Developer's List (llvmdev)`__
|
||||
`Developer's List (llvm-dev)`__
|
||||
This list is for people who want to be included in technical discussions of
|
||||
LLVM. People post to this list when they have questions about writing code
|
||||
for or using the LLVM tools. It is relatively low volume.
|
||||
|
||||
.. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
|
||||
.. __: http://lists.llvm.org/mailman/listinfo/llvm-dev
|
||||
|
||||
`Commits Archive (llvm-commits)`__
|
||||
This list contains all commit messages that are made when LLVM developers
|
||||
@ -439,26 +434,26 @@ lists.
|
||||
stay on the bleeding edge of LLVM development. This list is very high
|
||||
volume.
|
||||
|
||||
.. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/
|
||||
.. __: http://lists.llvm.org/pipermail/llvm-commits/
|
||||
|
||||
`Bugs & Patches Archive (llvmbugs)`__
|
||||
`Bugs & Patches Archive (llvm-bugs)`__
|
||||
This list gets emailed every time a bug is opened and closed. It is
|
||||
higher volume than the LLVMdev list.
|
||||
higher volume than the LLVM-dev list.
|
||||
|
||||
.. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/
|
||||
.. __: http://lists.llvm.org/pipermail/llvm-bugs/
|
||||
|
||||
`Test Results Archive (llvm-testresults)`__
|
||||
A message is automatically sent to this list by every active nightly tester
|
||||
when it completes. As such, this list gets email several times each day,
|
||||
making it a high volume list.
|
||||
|
||||
.. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/
|
||||
.. __: http://lists.llvm.org/pipermail/llvm-testresults/
|
||||
|
||||
`LLVM Announcements List (llvm-announce)`__
|
||||
This is a low volume list that provides important announcements regarding
|
||||
LLVM. It gets email about once a month.
|
||||
|
||||
.. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce
|
||||
.. __: http://lists.llvm.org/mailman/listinfo/llvm-announce
|
||||
|
||||
IRC
|
||||
---
|
||||
|
@ -90,8 +90,8 @@ For example, try adding:
|
||||
Have fun - try doing something crazy and unusual. Building a language
|
||||
like everyone else always has, is much less fun than trying something a
|
||||
little crazy or off the wall and seeing how it turns out. If you get
|
||||
stuck or want to talk about it, feel free to email the `llvmdev mailing
|
||||
list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
|
||||
stuck or want to talk about it, feel free to email the `llvm-dev mailing
|
||||
list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_: it has lots
|
||||
of people who are interested in languages and are often willing to help
|
||||
out.
|
||||
|
||||
@ -169,8 +169,8 @@ It is certainly possible to implement a safe language in LLVM, but LLVM
|
||||
IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
|
||||
casts, use after free bugs, buffer over-runs, and a variety of other
|
||||
problems. Safety needs to be implemented as a layer on top of LLVM and,
|
||||
conveniently, several groups have investigated this. Ask on the `llvmdev
|
||||
mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
|
||||
conveniently, several groups have investigated this. Ask on the `llvm-dev
|
||||
mailing list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ if
|
||||
you are interested in more details.
|
||||
|
||||
Language-Specific Optimizations
|
||||
@ -220,7 +220,7 @@ safe to optimize that into "return 0;" because C specifies what the
|
||||
In addition to simple library knowledge, it is possible to embed a
|
||||
variety of other language-specific information into the LLVM IR. If you
|
||||
have a specific need and run into a wall, please bring the topic up on
|
||||
the llvmdev list. At the very worst, you can always treat LLVM as if it
|
||||
the llvm-dev list. At the very worst, you can always treat LLVM as if it
|
||||
were a "dumb code generator" and implement the high-level optimizations
|
||||
you desire in your front-end, on the language-specific AST.
|
||||
|
||||
|
@ -95,8 +95,8 @@ For example, try adding:
|
||||
Have fun - try doing something crazy and unusual. Building a language
|
||||
like everyone else always has, is much less fun than trying something a
|
||||
little crazy or off the wall and seeing how it turns out. If you get
|
||||
stuck or want to talk about it, feel free to email the `llvmdev mailing
|
||||
list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
|
||||
stuck or want to talk about it, feel free to email the `llvm-dev mailing
|
||||
list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_: it has lots
|
||||
of people who are interested in languages and are often willing to help
|
||||
out.
|
||||
|
||||
@ -174,8 +174,8 @@ It is certainly possible to implement a safe language in LLVM, but LLVM
|
||||
IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
|
||||
casts, use after free bugs, buffer over-runs, and a variety of other
|
||||
problems. Safety needs to be implemented as a layer on top of LLVM and,
|
||||
conveniently, several groups have investigated this. Ask on the `llvmdev
|
||||
mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
|
||||
conveniently, several groups have investigated this. Ask on the `llvm-dev
|
||||
mailing list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ if
|
||||
you are interested in more details.
|
||||
|
||||
Language-Specific Optimizations
|
||||
@ -225,7 +225,7 @@ safe to optimize that into "return 0;" because C specifies what the
|
||||
In addition to simple library knowledge, it is possible to embed a
|
||||
variety of other language-specific information into the LLVM IR. If you
|
||||
have a specific need and run into a wall, please bring the topic up on
|
||||
the llvmdev list. At the very worst, you can always treat LLVM as if it
|
||||
the llvm-dev list. At the very worst, you can always treat LLVM as if it
|
||||
were a "dumb code generator" and implement the high-level optimizations
|
||||
you desire in your front-end, on the language-specific AST.
|
||||
|
||||
|
@ -115,7 +115,7 @@ char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T);
|
||||
LLVMDisposeMessage. */
|
||||
char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T);
|
||||
|
||||
/** Returns the llvm::DataLayout used for this llvm:TargetMachine. */
|
||||
/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
|
||||
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
|
||||
|
||||
/** Set the target machine's ASM verbosity. */
|
||||
|
@ -315,8 +315,10 @@ protected:
|
||||
T2>::value>::type * = nullptr) {
|
||||
// Use memcpy for PODs iterated by pointers (which includes SmallVector
|
||||
// iterators): std::uninitialized_copy optimizes to memmove, but we can
|
||||
// use memcpy here.
|
||||
memcpy(Dest, I, (E-I)*sizeof(T));
|
||||
// use memcpy here. Note that I and E are iterators and thus might be
|
||||
// invalid for memcpy if they are equal.
|
||||
if (I != E)
|
||||
memcpy(Dest, I, (E - I) * sizeof(T));
|
||||
}
|
||||
|
||||
/// Double the size of the allocated memory, guaranteeing space for at
|
||||
|
@ -158,7 +158,8 @@ public:
|
||||
|
||||
// Copy the string information.
|
||||
char *StrBuffer = const_cast<char*>(NewItem->getKeyData());
|
||||
memcpy(StrBuffer, Key.data(), KeyLength);
|
||||
if (KeyLength > 0)
|
||||
memcpy(StrBuffer, Key.data(), KeyLength);
|
||||
StrBuffer[KeyLength] = 0; // Null terminate for convenience of clients.
|
||||
return NewItem;
|
||||
}
|
||||
|
@ -32,11 +32,13 @@ namespace llvm {
|
||||
|
||||
class LiveInterval;
|
||||
class LiveIntervalAnalysis;
|
||||
class MachineRegisterInfo;
|
||||
class TargetRegisterInfo;
|
||||
class VirtRegMap;
|
||||
|
||||
class LiveRegMatrix : public MachineFunctionPass {
|
||||
const TargetRegisterInfo *TRI;
|
||||
MachineRegisterInfo *MRI;
|
||||
LiveIntervals *LIS;
|
||||
VirtRegMap *VRM;
|
||||
|
||||
|
@ -95,8 +95,20 @@ private:
|
||||
return MO->Contents.Reg.Next;
|
||||
}
|
||||
|
||||
/// UsedRegUnits - This is a bit vector that is computed and set by the
|
||||
/// register allocator, and must be kept up to date by passes that run after
|
||||
/// register allocation (though most don't modify this). This is used
|
||||
/// so that the code generator knows which callee save registers to save and
|
||||
/// for other target specific uses.
|
||||
/// This vector has bits set for register units that are modified in the
|
||||
/// current function. It doesn't include registers clobbered by function
|
||||
/// calls with register mask operands.
|
||||
BitVector UsedRegUnits;
|
||||
|
||||
/// UsedPhysRegMask - Additional used physregs including aliases.
|
||||
/// This bit vector represents all the registers clobbered by function calls.
|
||||
/// It can model things that UsedRegUnits can't, such as function calls that
|
||||
/// clobber ymm7 but preserve the low half in xmm7.
|
||||
BitVector UsedPhysRegMask;
|
||||
|
||||
/// ReservedRegs - This is a bit vector of reserved registers. The target
|
||||
@ -641,12 +653,55 @@ public:
|
||||
/// ignored.
|
||||
bool isPhysRegModified(unsigned PhysReg) const;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Physical Register Use Info
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
||||
/// isPhysRegUsed - Return true if the specified register is used in this
|
||||
/// function. Also check for clobbered aliases and registers clobbered by
|
||||
/// function calls with register mask operands.
|
||||
///
|
||||
/// This only works after register allocation.
|
||||
bool isPhysRegUsed(unsigned Reg) const {
|
||||
if (UsedPhysRegMask.test(Reg))
|
||||
return true;
|
||||
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
|
||||
Units.isValid(); ++Units)
|
||||
if (UsedRegUnits.test(*Units))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Mark the specified register unit as used in this function.
|
||||
/// This should only be called during and after register allocation.
|
||||
void setRegUnitUsed(unsigned RegUnit) {
|
||||
UsedRegUnits.set(RegUnit);
|
||||
}
|
||||
|
||||
/// setPhysRegUsed - Mark the specified register used in this function.
|
||||
/// This should only be called during and after register allocation.
|
||||
void setPhysRegUsed(unsigned Reg) {
|
||||
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
|
||||
Units.isValid(); ++Units)
|
||||
UsedRegUnits.set(*Units);
|
||||
}
|
||||
|
||||
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
|
||||
/// This corresponds to the bit mask attached to register mask operands.
|
||||
void addPhysRegsUsedFromRegMask(const uint32_t *RegMask) {
|
||||
UsedPhysRegMask.setBitsNotInMask(RegMask);
|
||||
}
|
||||
|
||||
/// setPhysRegUnused - Mark the specified register unused in this function.
|
||||
/// This should only be called during and after register allocation.
|
||||
void setPhysRegUnused(unsigned Reg) {
|
||||
UsedPhysRegMask.reset(Reg);
|
||||
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
|
||||
Units.isValid(); ++Units)
|
||||
UsedRegUnits.reset(*Units);
|
||||
}
|
||||
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Reserved Register Info
|
||||
//===--------------------------------------------------------------------===//
|
||||
|
@ -125,10 +125,15 @@ public:
|
||||
return *static_cast<const STC*>(getSubtargetImpl(F));
|
||||
}
|
||||
|
||||
/// Deprecated in 3.7, will be removed in 3.8. Use createDataLayout() instead.
|
||||
///
|
||||
/// This method returns a pointer to the DataLayout for the target. It should
|
||||
/// be unchanging for every subtarget.
|
||||
const DataLayout *getDataLayout() const { return &DL; }
|
||||
|
||||
/// Create a DataLayout.
|
||||
const DataLayout createDataLayout() const { return DL; }
|
||||
|
||||
/// \brief Reset the target options based on the function's attributes.
|
||||
// FIXME: Remove TargetOptions that affect per-function code generation
|
||||
// from TargetMachine.
|
||||
|
@ -206,14 +206,6 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
|
||||
return V;
|
||||
}
|
||||
|
||||
if (ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
|
||||
// if it's a constant, just convert it to an offset
|
||||
// and remove the variable.
|
||||
Offset += Const->getValue();
|
||||
assert(Scale == 0 && "Constant values don't have a scale");
|
||||
return V;
|
||||
}
|
||||
|
||||
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
|
||||
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
|
||||
switch (BOp->getOpcode()) {
|
||||
@ -261,10 +253,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
|
||||
Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
|
||||
Depth + 1, AC, DT);
|
||||
Scale = Scale.zext(OldWidth);
|
||||
|
||||
// We have to sign-extend even if Extension == EK_ZeroExt as we can't
|
||||
// decompose a sign extension (i.e. zext(x - 1) != zext(x) - zext(-1)).
|
||||
Offset = Offset.sext(OldWidth);
|
||||
Offset = Offset.zext(OldWidth);
|
||||
|
||||
return Result;
|
||||
}
|
||||
@ -1135,43 +1124,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(
|
||||
}
|
||||
}
|
||||
|
||||
// Try to distinguish something like &A[i][1] against &A[42][0].
|
||||
// Grab the least significant bit set in any of the scales.
|
||||
if (!GEP1VariableIndices.empty()) {
|
||||
uint64_t Modulo = 0;
|
||||
bool AllPositive = true;
|
||||
for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
|
||||
|
||||
// Try to distinguish something like &A[i][1] against &A[42][0].
|
||||
// Grab the least significant bit set in any of the scales. We
|
||||
// don't need std::abs here (even if the scale's negative) as we'll
|
||||
// be ^'ing Modulo with itself later.
|
||||
for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
|
||||
Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
|
||||
|
||||
if (AllPositive) {
|
||||
// If the Value could change between cycles, then any reasoning about
|
||||
// the Value this cycle may not hold in the next cycle. We'll just
|
||||
// give up if we can't determine conditions that hold for every cycle:
|
||||
const Value *V = GEP1VariableIndices[i].V;
|
||||
|
||||
bool SignKnownZero, SignKnownOne;
|
||||
ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL,
|
||||
0, AC1, nullptr, DT);
|
||||
|
||||
// Zero-extension widens the variable, and so forces the sign
|
||||
// bit to zero.
|
||||
bool IsZExt = GEP1VariableIndices[i].Extension == EK_ZeroExt;
|
||||
SignKnownZero |= IsZExt;
|
||||
SignKnownOne &= !IsZExt;
|
||||
|
||||
// If the variable begins with a zero then we know it's
|
||||
// positive, regardless of whether the value is signed or
|
||||
// unsigned.
|
||||
int64_t Scale = GEP1VariableIndices[i].Scale;
|
||||
AllPositive =
|
||||
(SignKnownZero && Scale >= 0) ||
|
||||
(SignKnownOne && Scale < 0);
|
||||
}
|
||||
}
|
||||
|
||||
Modulo = Modulo ^ (Modulo & (Modulo - 1));
|
||||
|
||||
// We can compute the difference between the two addresses
|
||||
@ -1182,12 +1140,6 @@ AliasResult BasicAliasAnalysis::aliasGEP(
|
||||
V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
|
||||
V1Size <= Modulo - ModOffset)
|
||||
return NoAlias;
|
||||
|
||||
// If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
|
||||
// If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
|
||||
// don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
|
||||
if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t) GEP1BaseOffset)
|
||||
return NoAlias;
|
||||
}
|
||||
|
||||
// Statically, we can see that the base objects are the same, but the
|
||||
|
@ -440,31 +440,40 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
|
||||
}
|
||||
|
||||
// Scan the function bodies for explicit loads or stores.
|
||||
for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;
|
||||
++i)
|
||||
for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
|
||||
E = inst_end(SCC[i]->getFunction());
|
||||
II != E && FunctionEffect != ModRef; ++II)
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
|
||||
FunctionEffect |= Ref;
|
||||
if (LI->isVolatile())
|
||||
// Volatile loads may have side-effects, so mark them as writing
|
||||
// memory (for example, a flag inside the processor).
|
||||
FunctionEffect |= Mod;
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(&*II)) {
|
||||
FunctionEffect |= Mod;
|
||||
if (SI->isVolatile())
|
||||
// Treat volatile stores as reading memory somewhere.
|
||||
FunctionEffect |= Ref;
|
||||
} else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) {
|
||||
FunctionEffect |= ModRef;
|
||||
} else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) {
|
||||
// The callgraph doesn't include intrinsic calls.
|
||||
Function *Callee = Intrinsic->getCalledFunction();
|
||||
ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee);
|
||||
FunctionEffect |= (Behaviour & ModRef);
|
||||
for (auto *Node : SCC) {
|
||||
if (FunctionEffect == ModRef)
|
||||
break; // The mod/ref lattice saturates here.
|
||||
for (Instruction &I : inst_range(Node->getFunction())) {
|
||||
if (FunctionEffect == ModRef)
|
||||
break; // The mod/ref lattice saturates here.
|
||||
|
||||
// We handle calls specially because the graph-relevant aspects are
|
||||
// handled above.
|
||||
if (auto CS = CallSite(&I)) {
|
||||
if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) {
|
||||
// FIXME: It is completely unclear why this is necessary and not
|
||||
// handled by the above graph code.
|
||||
FunctionEffect |= ModRef;
|
||||
} else if (Function *Callee = CS.getCalledFunction()) {
|
||||
// The callgraph doesn't include intrinsic calls.
|
||||
if (Callee->isIntrinsic()) {
|
||||
ModRefBehavior Behaviour =
|
||||
AliasAnalysis::getModRefBehavior(Callee);
|
||||
FunctionEffect |= (Behaviour & ModRef);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// All non-call instructions we use the primary predicates for whether
|
||||
// thay read or write memory.
|
||||
if (I.mayReadFromMemory())
|
||||
FunctionEffect |= Ref;
|
||||
if (I.mayWriteToMemory())
|
||||
FunctionEffect |= Mod;
|
||||
}
|
||||
}
|
||||
|
||||
if ((FunctionEffect & Mod) == 0)
|
||||
++NumReadMemFunctions;
|
||||
if (FunctionEffect == 0)
|
||||
|
@ -3574,18 +3574,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
|
||||
|
||||
// If extracting a specified index from the vector, see if we can recursively
|
||||
// find a previously computed scalar that was inserted into the vector.
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
|
||||
unsigned IndexVal = IdxC->getZExtValue();
|
||||
unsigned VectorWidth = Vec->getType()->getVectorNumElements();
|
||||
|
||||
// If this is extracting an invalid index, turn this into undef, to avoid
|
||||
// crashing the code below.
|
||||
if (IndexVal >= VectorWidth)
|
||||
return UndefValue::get(Vec->getType()->getVectorElementType());
|
||||
|
||||
if (Value *Elt = findScalarElement(Vec, IndexVal))
|
||||
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
|
||||
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
|
||||
return Elt;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -374,9 +374,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
|
||||
if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
|
||||
return Tmp.getAddr();
|
||||
|
||||
// If we don't have an available version of this value, it must be an
|
||||
// instruction.
|
||||
Instruction *Inst = cast<Instruction>(InVal);
|
||||
// We don't need to PHI translate values which aren't instructions.
|
||||
auto *Inst = dyn_cast<Instruction>(InVal);
|
||||
if (!Inst)
|
||||
return nullptr;
|
||||
|
||||
// Handle cast of PHI translatable value.
|
||||
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
|
||||
|
@ -402,8 +402,9 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
|
||||
if (match(V,
|
||||
llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
|
||||
llvm::PatternMatch::m_Constant(Con)))) {
|
||||
if (Con->getAggregateElement(EltNo)->isNullValue())
|
||||
return findScalarElement(Val, EltNo);
|
||||
if (Constant *Elt = Con->getAggregateElement(EltNo))
|
||||
if (Elt->isNullValue())
|
||||
return findScalarElement(Val, EltNo);
|
||||
}
|
||||
|
||||
// Otherwise, we don't know.
|
||||
|
@ -733,14 +733,12 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
|
||||
// If no relevant registers are used in the function, we can skip it
|
||||
// completely.
|
||||
bool anyregs = false;
|
||||
const MachineRegisterInfo &MRI = mf.getRegInfo();
|
||||
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
|
||||
I != E && !anyregs; ++I)
|
||||
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
|
||||
if (!MRI.reg_nodbg_empty(*AI)) {
|
||||
anyregs = true;
|
||||
break;
|
||||
}
|
||||
I != E; ++I)
|
||||
if (MF->getRegInfo().isPhysRegUsed(*I)) {
|
||||
anyregs = true;
|
||||
break;
|
||||
}
|
||||
if (!anyregs) return false;
|
||||
|
||||
// Initialize the AliasMap on the first use.
|
||||
|
@ -15,12 +15,12 @@
|
||||
#include "RegisterCoalescer.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/VirtRegMap.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -49,6 +49,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
|
||||
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
|
||||
TRI = MF.getSubtarget().getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
LIS = &getAnalysis<LiveIntervals>();
|
||||
VRM = &getAnalysis<VirtRegMap>();
|
||||
|
||||
@ -100,6 +101,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
|
||||
<< " to " << PrintReg(PhysReg, TRI) << ':');
|
||||
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
|
||||
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
|
||||
MRI->setPhysRegUsed(PhysReg);
|
||||
|
||||
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
|
||||
const LiveRange &Range) {
|
||||
|
@ -29,6 +29,7 @@ MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
|
||||
TracksSubRegLiveness(false) {
|
||||
VRegInfo.reserve(256);
|
||||
RegAllocHints.reserve(256);
|
||||
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
|
||||
UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
|
||||
|
||||
// Create the physreg use/def lists.
|
||||
|
@ -624,6 +624,10 @@ struct DataDep {
|
||||
static bool getDataDeps(const MachineInstr *UseMI,
|
||||
SmallVectorImpl<DataDep> &Deps,
|
||||
const MachineRegisterInfo *MRI) {
|
||||
// Debug values should not be included in any calculations.
|
||||
if (UseMI->isDebugValue())
|
||||
return false;
|
||||
|
||||
bool HasPhysRegs = false;
|
||||
for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
|
||||
E = UseMI->operands_end(); I != E; ++I) {
|
||||
|
@ -1026,8 +1026,12 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
|
||||
// Replace this reference to the virtual register with the
|
||||
// scratch register.
|
||||
assert (ScratchReg && "Missing scratch register!");
|
||||
MachineRegisterInfo &MRI = Fn.getRegInfo();
|
||||
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
|
||||
|
||||
// Make sure MRI now accounts this register as used.
|
||||
MRI.setPhysRegUsed(ScratchReg);
|
||||
|
||||
// Because this instruction was processed by the RS before this
|
||||
// register was allocated, make sure that the RS now records the
|
||||
// register as being used.
|
||||
|
@ -986,6 +986,10 @@ void RAFast::AllocateBasicBlock() {
|
||||
}
|
||||
}
|
||||
|
||||
for (UsedInInstrSet::iterator
|
||||
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
|
||||
MRI->setRegUnitUsed(*I);
|
||||
|
||||
// Track registers defined by instruction - early clobbers and tied uses at
|
||||
// this point.
|
||||
UsedInInstr.clear();
|
||||
@ -1046,6 +1050,10 @@ void RAFast::AllocateBasicBlock() {
|
||||
killVirtReg(VirtDead[i]);
|
||||
VirtDead.clear();
|
||||
|
||||
for (UsedInInstrSet::iterator
|
||||
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
|
||||
MRI->setRegUnitUsed(*I);
|
||||
|
||||
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
|
||||
DEBUG(dbgs() << "-- coalescing: " << *MI);
|
||||
Coalesced.push_back(MI);
|
||||
@ -1095,6 +1103,12 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
|
||||
AllocateBasicBlock();
|
||||
}
|
||||
|
||||
// Add the clobber lists for all the instructions we skipped earlier.
|
||||
for (const MCInstrDesc *Desc : SkippedInstrs)
|
||||
if (const uint16_t *Defs = Desc->getImplicitDefs())
|
||||
while (*Defs)
|
||||
MRI->setPhysRegUsed(*Defs++);
|
||||
|
||||
// All machine operands and other references to virtual registers have been
|
||||
// replaced. Remove the virtual registers.
|
||||
MRI->clearVirtRegs();
|
||||
|
@ -1531,6 +1531,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
|
||||
DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
|
||||
return false;
|
||||
}
|
||||
|
||||
// We must also check for clobbers caused by regmasks.
|
||||
for (const auto &MO : MI->operands()) {
|
||||
if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
|
||||
DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We're going to remove the copy which defines a physical reserved
|
||||
|
@ -8365,12 +8365,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
|
||||
if (N0CFP && N0CFP->isExactlyValue(1.0))
|
||||
return SDValue();
|
||||
|
||||
SmallVector<SDNode *, 4> Users;
|
||||
// Find all FDIV users of the same divisor.
|
||||
for (auto *U : N1->uses()) {
|
||||
// Use a set because duplicates may be present in the user list.
|
||||
SetVector<SDNode *> Users;
|
||||
for (auto *U : N1->uses())
|
||||
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
|
||||
Users.push_back(U);
|
||||
}
|
||||
Users.insert(U);
|
||||
|
||||
if (TLI.combineRepeatedFPDivisors(Users.size())) {
|
||||
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
|
||||
|
@ -163,6 +163,7 @@ class VirtRegRewriter : public MachineFunctionPass {
|
||||
SlotIndexes *Indexes;
|
||||
LiveIntervals *LIS;
|
||||
VirtRegMap *VRM;
|
||||
SparseSet<unsigned> PhysRegs;
|
||||
|
||||
void rewrite();
|
||||
void addMBBLiveIns();
|
||||
@ -318,15 +319,54 @@ void VirtRegRewriter::rewrite() {
|
||||
SmallVector<unsigned, 8> SuperDeads;
|
||||
SmallVector<unsigned, 8> SuperDefs;
|
||||
SmallVector<unsigned, 8> SuperKills;
|
||||
SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
|
||||
|
||||
// Here we have a SparseSet to hold which PhysRegs are actually encountered
|
||||
// in the MF we are about to iterate over so that later when we call
|
||||
// setPhysRegUsed, we are only doing it for physRegs that were actually found
|
||||
// in the program and not for all of the possible physRegs for the given
|
||||
// target architecture. If the target has a lot of physRegs, then for a small
|
||||
// program there will be a significant compile time reduction here.
|
||||
PhysRegs.clear();
|
||||
PhysRegs.setUniverse(TRI->getNumRegs());
|
||||
|
||||
// The function with uwtable should guarantee that the stack unwinder
|
||||
// can unwind the stack to the previous frame. Thus, we can't apply the
|
||||
// noreturn optimization if the caller function has uwtable attribute.
|
||||
bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
|
||||
|
||||
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
|
||||
MBBI != MBBE; ++MBBI) {
|
||||
DEBUG(MBBI->print(dbgs(), Indexes));
|
||||
bool IsExitBB = MBBI->succ_empty();
|
||||
for (MachineBasicBlock::instr_iterator
|
||||
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
|
||||
MachineInstr *MI = MII;
|
||||
++MII;
|
||||
|
||||
// Check if this instruction is a call to a noreturn function. If this
|
||||
// is a call to noreturn function and we don't need the stack unwinding
|
||||
// functionality (i.e. this function does not have uwtable attribute and
|
||||
// the callee function has the nounwind attribute), then we can ignore
|
||||
// the definitions set by this instruction.
|
||||
if (!HasUWTable && IsExitBB && MI->isCall()) {
|
||||
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
|
||||
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
|
||||
MachineOperand &MO = *MOI;
|
||||
if (!MO.isGlobal())
|
||||
continue;
|
||||
const Function *Func = dyn_cast<Function>(MO.getGlobal());
|
||||
if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
|
||||
// We need to keep correct unwind information
|
||||
// even if the function will not return, since the
|
||||
// runtime may need it.
|
||||
!Func->hasFnAttribute(Attribute::NoUnwind))
|
||||
continue;
|
||||
NoReturnInsts.insert(MI);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
|
||||
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
|
||||
MachineOperand &MO = *MOI;
|
||||
@ -335,6 +375,15 @@ void VirtRegRewriter::rewrite() {
|
||||
if (MO.isRegMask())
|
||||
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
|
||||
|
||||
// If we encounter a VirtReg or PhysReg then get at the PhysReg and add
|
||||
// it to the physreg bitset. Later we use only the PhysRegs that were
|
||||
// actually encountered in the MF to populate the MRI's used physregs.
|
||||
if (MO.isReg() && MO.getReg())
|
||||
PhysRegs.insert(
|
||||
TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
|
||||
VRM->getPhys(MO.getReg()) :
|
||||
MO.getReg());
|
||||
|
||||
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
|
||||
continue;
|
||||
unsigned VirtReg = MO.getReg();
|
||||
@ -421,5 +470,29 @@ void VirtRegRewriter::rewrite() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tell MRI about physical registers in use.
|
||||
if (NoReturnInsts.empty()) {
|
||||
for (SparseSet<unsigned>::iterator
|
||||
RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
|
||||
if (!MRI->reg_nodbg_empty(*RegI))
|
||||
MRI->setPhysRegUsed(*RegI);
|
||||
} else {
|
||||
for (SparseSet<unsigned>::iterator
|
||||
I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
|
||||
unsigned Reg = *I;
|
||||
if (MRI->reg_nodbg_empty(Reg))
|
||||
continue;
|
||||
// Check if this register has a use that will impact the rest of the
|
||||
// code. Uses in debug and noreturn instructions do not impact the
|
||||
// generated code.
|
||||
for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
|
||||
if (!NoReturnInsts.count(&It)) {
|
||||
MRI->setPhysRegUsed(Reg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -180,10 +180,17 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
|
||||
}
|
||||
|
||||
std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
|
||||
assert(GV->hasName() && "Global must have name.");
|
||||
|
||||
MutexGuard locked(lock);
|
||||
Mangler Mang;
|
||||
SmallString<128> FullName;
|
||||
Mang.getNameWithPrefix(FullName, GV, false);
|
||||
|
||||
const DataLayout &DL =
|
||||
GV->getParent()->getDataLayout().isDefault()
|
||||
? *getDataLayout()
|
||||
: GV->getParent()->getDataLayout();
|
||||
|
||||
Mangler::getNameWithPrefix(FullName, GV->getName(), DL);
|
||||
return FullName.str();
|
||||
}
|
||||
|
||||
|
@ -266,6 +266,12 @@ void MCJIT::finalizeModule(Module *M) {
|
||||
RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
|
||||
SmallString<128> FullName;
|
||||
Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout());
|
||||
|
||||
if (void *Addr = getPointerToGlobalIfAvailable(FullName))
|
||||
return RuntimeDyld::SymbolInfo(static_cast<uint64_t>(
|
||||
reinterpret_cast<uintptr_t>(Addr)),
|
||||
JITSymbolFlags::Exported);
|
||||
|
||||
return Dyld.getSymbol(FullName);
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,7 @@ void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
|
||||
uint64_t LoadAddr,
|
||||
size_t Size) {
|
||||
// On OS X OS X __register_frame takes a single FDE as an argument.
|
||||
// See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
|
||||
// See http://lists.llvm.org/pipermail/llvm-dev/2013-April/061768.html
|
||||
const char *P = (const char *)Addr;
|
||||
const char *End = P + Size;
|
||||
do {
|
||||
|
@ -613,6 +613,9 @@ bool StructType::isLayoutIdentical(StructType *Other) const {
|
||||
if (isPacked() != Other->isPacked() ||
|
||||
getNumElements() != Other->getNumElements())
|
||||
return false;
|
||||
|
||||
if (!getNumElements())
|
||||
return true;
|
||||
|
||||
return std::equal(element_begin(), element_end(), Other->element_begin());
|
||||
}
|
||||
|
@ -57,7 +57,8 @@ void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
|
||||
/// CopyStringRef - Copies contents of a StringRef into a block of memory and
|
||||
/// null-terminates it.
|
||||
static void CopyStringRef(char *Memory, StringRef Data) {
|
||||
memcpy(Memory, Data.data(), Data.size());
|
||||
if (!Data.empty())
|
||||
memcpy(Memory, Data.data(), Data.size());
|
||||
Memory[Data.size()] = 0; // Null terminate string.
|
||||
}
|
||||
|
||||
|
@ -593,6 +593,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
|
||||
if (Change) {
|
||||
Substs[MO.getReg()] = Reg;
|
||||
MO.setReg(Reg);
|
||||
MRI->setPhysRegUsed(Reg);
|
||||
|
||||
Changed = true;
|
||||
}
|
||||
|
@ -354,6 +354,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
if (NumBytes && NeedsRealignment) {
|
||||
// Use the first callee-saved register as a scratch register.
|
||||
scratchSPReg = AArch64::X9;
|
||||
MF.getRegInfo().setPhysRegUsed(scratchSPReg);
|
||||
}
|
||||
|
||||
// If we're a leaf function, try using the red zone.
|
||||
|
@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
|
||||
"true",
|
||||
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
|
||||
|
||||
def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
|
||||
"EnableHugeScratchBuffer",
|
||||
"true",
|
||||
"Enable scratch buffer sizes greater than 128 GB">;
|
||||
|
||||
class SubtargetFeatureFetchLimit <string Value> :
|
||||
SubtargetFeature <"fetch"#Value,
|
||||
"TexVTXClauseSize",
|
||||
|
@ -1029,6 +1029,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &SLC, SDValue &TFE) const {
|
||||
SDValue Ptr, Offen, Idxen, Addr64;
|
||||
|
||||
// addr64 bit was removed for volcanic islands.
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
return false;
|
||||
|
||||
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE);
|
||||
|
||||
@ -1095,13 +1099,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
|
||||
|
||||
// (add n0, c1)
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
SDValue N0 = Addr.getOperand(0);
|
||||
SDValue N1 = Addr.getOperand(1);
|
||||
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
||||
|
||||
if (isLegalMUBUFImmOffset(C1)) {
|
||||
VAddr = Addr.getOperand(0);
|
||||
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
// Offsets in vaddr must be positive.
|
||||
if (CurDAG->SignBitIsZero(N0)) {
|
||||
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
||||
if (isLegalMUBUFImmOffset(C1)) {
|
||||
VAddr = N0;
|
||||
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
|
||||
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
|
||||
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
|
||||
IsaVersion(ISAVersion0_0_0),
|
||||
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
64 * 16, // Maximum stack alignment (long16)
|
||||
0),
|
||||
|
@ -89,6 +89,7 @@ private:
|
||||
bool FeatureDisable;
|
||||
int LDSBankCount;
|
||||
unsigned IsaVersion;
|
||||
bool EnableHugeScratchBuffer;
|
||||
|
||||
AMDGPUFrameLowering FrameLowering;
|
||||
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
|
||||
@ -271,6 +272,10 @@ public:
|
||||
return DevName;
|
||||
}
|
||||
|
||||
bool enableHugeScratchBuffer() const {
|
||||
return EnableHugeScratchBuffer;
|
||||
}
|
||||
|
||||
bool dumpCode() const {
|
||||
return DumpCode;
|
||||
}
|
||||
|
@ -1719,7 +1719,6 @@ MachineBasicBlock *
|
||||
AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
|
||||
MachineBasicBlock *LoopHeader = LoopRep->getHeader();
|
||||
MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
|
||||
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
|
||||
|
||||
if (!LoopHeader || !LoopLatch)
|
||||
return nullptr;
|
||||
@ -1732,18 +1731,9 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
|
||||
FuncRep->push_back(DummyExitBlk); //insert to function
|
||||
SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
|
||||
DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
|
||||
MachineBasicBlock::iterator I = BranchMI;
|
||||
unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
|
||||
llvm_unreachable("Extra register needed to handle CFG");
|
||||
MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
|
||||
MachineInstrBuilder MIB(*FuncRep, NewMI);
|
||||
MIB.addMBB(LoopHeader);
|
||||
MIB.addReg(ImmReg, false);
|
||||
SHOWNEWINSTR(NewMI);
|
||||
BranchMI->eraseFromParent();
|
||||
LoopLatch->addSuccessor(DummyExitBlk);
|
||||
|
||||
return DummyExitBlk;
|
||||
LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
|
||||
Ctx.emitError("Extra register needed to handle CFG");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
|
||||
|
@ -138,3 +138,7 @@ def : ProcessorModel<"iceland", SIQuarterSpeedModel,
|
||||
def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
|
||||
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
|
||||
>;
|
||||
|
||||
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
|
||||
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
|
||||
>;
|
||||
|
@ -254,6 +254,12 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
|
||||
// Flat instructions do not have offsets, and only have the register
|
||||
// address.
|
||||
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
|
||||
}
|
||||
|
||||
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
const AddrMode &AM, Type *Ty,
|
||||
unsigned AS) const {
|
||||
@ -263,8 +269,21 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
|
||||
switch (AS) {
|
||||
case AMDGPUAS::GLOBAL_ADDRESS:
|
||||
case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
|
||||
// Assume the we will use FLAT for all global memory accesses
|
||||
// on VI.
|
||||
// FIXME: This assumption is currently wrong. On VI we still use
|
||||
// MUBUF instructions for the r + i addressing mode. As currently
|
||||
// implemented, the MUBUF instructions only work on buffer < 4GB.
|
||||
// It may be possible to support > 4GB buffers with MUBUF instructions,
|
||||
// by setting the stride value in the resource descriptor which would
|
||||
// increase the size limit to (stride * 4GB). However, this is risky,
|
||||
// because it has never been validated.
|
||||
return isLegalFlatAddressingMode(AM);
|
||||
}
|
||||
// fall-through
|
||||
case AMDGPUAS::PRIVATE_ADDRESS:
|
||||
case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
|
||||
case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
|
||||
// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
|
||||
// additionally can do r + r + i with addr64. 32-bit has more addressing
|
||||
@ -324,11 +343,9 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
|
||||
|
||||
return false;
|
||||
}
|
||||
case AMDGPUAS::FLAT_ADDRESS: {
|
||||
// Flat instructions do not have offsets, and only have the register
|
||||
// address.
|
||||
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
|
||||
}
|
||||
case AMDGPUAS::FLAT_ADDRESS:
|
||||
return isLegalFlatAddressingMode(AM);
|
||||
|
||||
default:
|
||||
llvm_unreachable("unhandled address space");
|
||||
}
|
||||
@ -812,10 +829,29 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
|
||||
|
||||
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDLoc SL(Op);
|
||||
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
|
||||
unsigned FrameIndex = FINode->getIndex();
|
||||
|
||||
return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
|
||||
// A FrameIndex node represents a 32-bit offset into scratch memory. If
|
||||
// the high bit of a frame index offset were to be set, this would mean
|
||||
// that it represented an offset of ~2GB * 64 = ~128GB from the start of the
|
||||
// scratch buffer, with 64 being the number of threads per wave.
|
||||
//
|
||||
// If we know the machine uses less than 128GB of scratch, then we can
|
||||
// amrk the high bit of the FrameIndex node as known zero,
|
||||
// which is important, because it means in most situations we can
|
||||
// prove that values derived from FrameIndex nodes are non-negative.
|
||||
// This enables us to take advantage of more addressing modes when
|
||||
// accessing scratch buffers, since for scratch reads/writes, the register
|
||||
// offset must always be positive.
|
||||
|
||||
SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
|
||||
if (Subtarget->enableHugeScratchBuffer())
|
||||
return TFI;
|
||||
|
||||
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
|
||||
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
|
||||
}
|
||||
|
||||
/// This transforms the control flow intrinsics to get the branch destination as
|
||||
@ -2034,6 +2070,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
|
||||
}
|
||||
}
|
||||
|
||||
static bool isFrameIndexOp(SDValue Op) {
|
||||
if (Op.getOpcode() == ISD::AssertZext)
|
||||
Op = Op.getOperand(0);
|
||||
|
||||
return isa<FrameIndexSDNode>(Op);
|
||||
}
|
||||
|
||||
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
|
||||
/// with frame index operands.
|
||||
/// LLVM assumes that inputs are to these instructions are registers.
|
||||
@ -2042,7 +2085,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
|
||||
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
|
||||
if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
|
||||
if (!isFrameIndexOp(Node->getOperand(i))) {
|
||||
Ops.push_back(Node->getOperand(i));
|
||||
continue;
|
||||
}
|
||||
|
@ -56,6 +56,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
|
||||
|
||||
|
@ -1600,12 +1600,14 @@ multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
|
||||
SIMCInstr <opName#"_e32", SISubtarget.SI> {
|
||||
let Defs = !if(DefExec, [EXEC], []);
|
||||
let hasSideEffects = DefExec;
|
||||
let AssemblerPredicates = [isSICI];
|
||||
}
|
||||
|
||||
def _vi : VOPC<op.VI, ins, asm, []>,
|
||||
SIMCInstr <opName#"_e32", SISubtarget.VI> {
|
||||
let Defs = !if(DefExec, [EXEC], []);
|
||||
let hasSideEffects = DefExec;
|
||||
let AssemblerPredicates = [isVI];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2910,9 +2910,6 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
|
||||
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
|
||||
} // End Predicates = [isSICI]
|
||||
|
||||
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
|
||||
@ -3273,13 +3270,13 @@ def : Pat <
|
||||
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
|
||||
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
|
||||
(V_CNDMASK_B64_PSEUDO
|
||||
$x,
|
||||
(V_MIN_F64
|
||||
SRCMODS.NONE,
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
SRCMODS.NONE,
|
||||
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
|
||||
DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
$x,
|
||||
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
|
||||
>;
|
||||
|
||||
@ -3291,13 +3288,13 @@ def : Pat <
|
||||
$x,
|
||||
SRCMODS.NEG,
|
||||
(V_CNDMASK_B64_PSEUDO
|
||||
$x,
|
||||
(V_MIN_F64
|
||||
SRCMODS.NONE,
|
||||
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
SRCMODS.NONE,
|
||||
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
|
||||
DSTCLAMP.NONE, DSTOMOD.NONE),
|
||||
$x,
|
||||
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
|
||||
DSTCLAMP.NONE, DSTOMOD.NONE)
|
||||
>;
|
||||
|
@ -53,6 +53,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
|
||||
if (!LaneVGPRs.count(LaneVGPRIdx)) {
|
||||
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
|
||||
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
|
||||
MRI.setPhysRegUsed(LaneVGPR);
|
||||
|
||||
// Add this register as live-in to all blocks to avoid machine verifer
|
||||
// complaining about use of an undefined physical register.
|
||||
|
@ -91,6 +91,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
if (ScratchOffsetReg != AMDGPU::NoRegister) {
|
||||
// Found an SGPR to use
|
||||
MRI.setPhysRegUsed(ScratchOffsetReg);
|
||||
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
|
||||
.addReg(ScratchOffsetPreloadReg);
|
||||
} else {
|
||||
|
@ -348,7 +348,8 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
|
||||
&AMDGPU::SReg_128RegClass,
|
||||
&AMDGPU::VReg_256RegClass,
|
||||
&AMDGPU::SReg_256RegClass,
|
||||
&AMDGPU::VReg_512RegClass
|
||||
&AMDGPU::VReg_512RegClass,
|
||||
&AMDGPU::SReg_512RegClass
|
||||
};
|
||||
|
||||
for (const TargetRegisterClass *BaseClass : BaseClasses) {
|
||||
@ -499,7 +500,7 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
|
||||
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
|
||||
I != E; ++I) {
|
||||
if (MRI.reg_nodbg_empty(*I))
|
||||
if (!MRI.isPhysRegUsed(*I))
|
||||
return *I;
|
||||
}
|
||||
return AMDGPU::NoRegister;
|
||||
|
@ -103,4 +103,46 @@ def : Pat <
|
||||
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
|
||||
>;
|
||||
|
||||
// Patterns for global loads with no offset
|
||||
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr)),
|
||||
(inst $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
|
||||
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
|
||||
|
||||
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(node vt:$data, i64:$addr),
|
||||
(inst $data, $addr, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
|
||||
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
|
||||
|
||||
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
|
||||
(vt (node i64:$addr, vt:$data)),
|
||||
(inst $addr, $data, 0, 0)
|
||||
>;
|
||||
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
|
||||
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
|
||||
|
||||
|
||||
} // End Predicates = [isVI]
|
||||
|
@ -4583,6 +4583,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
|
||||
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (CmpVT.getVectorElementType() == MVT::i64)
|
||||
// 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
|
||||
// but it's possible that our operands are 64-bit but our result is 32-bit.
|
||||
// Bail in this case.
|
||||
return SDValue();
|
||||
|
||||
if (Op1.getValueType().isFloatingPoint()) {
|
||||
switch (SetCCOpcode) {
|
||||
default: llvm_unreachable("Illegal FP comparison");
|
||||
|
@ -118,7 +118,6 @@ namespace {
|
||||
};
|
||||
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
|
||||
SmallVector<const MergeCandidate*,4> Candidates;
|
||||
SmallVector<MachineInstr*,4> MergeBaseCandidates;
|
||||
|
||||
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::const_iterator Before);
|
||||
@ -141,7 +140,6 @@ namespace {
|
||||
MachineBasicBlock::iterator &MBBI);
|
||||
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
|
||||
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
|
||||
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
|
||||
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
|
||||
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
|
||||
};
|
||||
@ -933,6 +931,11 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
|
||||
if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
|
||||
CanMergeToLSMulti = false;
|
||||
|
||||
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
|
||||
// deprecated; LDM to PC is fine but cannot happen here.
|
||||
if (PReg == ARM::SP || PReg == ARM::PC)
|
||||
CanMergeToLSMulti = CanMergeToLSDouble = false;
|
||||
|
||||
// Merge following instructions where possible.
|
||||
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
|
||||
int NewOffset = MemOps[I].Offset;
|
||||
@ -940,16 +943,15 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
|
||||
break;
|
||||
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
|
||||
unsigned Reg = MO.getReg();
|
||||
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
|
||||
if (Reg == ARM::SP || Reg == ARM::PC)
|
||||
break;
|
||||
|
||||
// See if the current load/store may be part of a multi load/store.
|
||||
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
|
||||
bool PartOfLSMulti = CanMergeToLSMulti;
|
||||
if (PartOfLSMulti) {
|
||||
// Cannot load from SP
|
||||
if (Reg == ARM::SP)
|
||||
PartOfLSMulti = false;
|
||||
// Register numbers must be in ascending order.
|
||||
else if (RegNum <= PRegNum)
|
||||
if (RegNum <= PRegNum)
|
||||
PartOfLSMulti = false;
|
||||
// For VFP / NEON load/store multiples, the registers must be
|
||||
// consecutive and within the limit on the number of registers per
|
||||
@ -993,6 +995,76 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
|
||||
} while (SIndex < EIndex);
|
||||
}
|
||||
|
||||
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes, unsigned Limit,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
unsigned MyPredReg = 0;
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
bool CheckCPSRDef = false;
|
||||
switch (MI->getOpcode()) {
|
||||
default: return false;
|
||||
case ARM::tSUBi8:
|
||||
case ARM::t2SUBri:
|
||||
case ARM::SUBri:
|
||||
CheckCPSRDef = true;
|
||||
break;
|
||||
case ARM::tSUBspi:
|
||||
break;
|
||||
}
|
||||
|
||||
// Make sure the offset fits in 8 bits.
|
||||
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
||||
return false;
|
||||
|
||||
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
|
||||
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
|
||||
if (!(MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||
MyPredReg == PredReg))
|
||||
return false;
|
||||
|
||||
return CheckCPSRDef ? !definesCPSR(MI) : true;
|
||||
}
|
||||
|
||||
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes, unsigned Limit,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
unsigned MyPredReg = 0;
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
bool CheckCPSRDef = false;
|
||||
switch (MI->getOpcode()) {
|
||||
default: return false;
|
||||
case ARM::tADDi8:
|
||||
case ARM::t2ADDri:
|
||||
case ARM::ADDri:
|
||||
CheckCPSRDef = true;
|
||||
break;
|
||||
case ARM::tADDspi:
|
||||
break;
|
||||
}
|
||||
|
||||
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
||||
// Make sure the offset fits in 8 bits.
|
||||
return false;
|
||||
|
||||
unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
|
||||
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
|
||||
if (!(MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||
MyPredReg == PredReg))
|
||||
return false;
|
||||
|
||||
return CheckCPSRDef ? !definesCPSR(MI) : true;
|
||||
}
|
||||
|
||||
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
|
||||
ARM_AM::AMSubMode Mode) {
|
||||
switch (Opc) {
|
||||
@ -1060,75 +1132,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the given instruction increments or decrements a register and
|
||||
/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
|
||||
/// generated by the instruction are possibly read as well.
|
||||
static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
bool CheckCPSRDef;
|
||||
int Scale;
|
||||
switch (MI.getOpcode()) {
|
||||
case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
|
||||
case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
|
||||
case ARM::t2SUBri:
|
||||
case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
|
||||
case ARM::t2ADDri:
|
||||
case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
|
||||
case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
|
||||
case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
unsigned MIPredReg;
|
||||
if (MI.getOperand(0).getReg() != Reg ||
|
||||
MI.getOperand(1).getReg() != Reg ||
|
||||
getInstrPredicate(&MI, MIPredReg) != Pred ||
|
||||
MIPredReg != PredReg)
|
||||
return 0;
|
||||
|
||||
if (CheckCPSRDef && definesCPSR(&MI))
|
||||
return 0;
|
||||
return MI.getOperand(2).getImm() * Scale;
|
||||
}
|
||||
|
||||
/// Searches for an increment or decrement of \p Reg before \p MBBI.
|
||||
static MachineBasicBlock::iterator
|
||||
findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
|
||||
Offset = 0;
|
||||
MachineBasicBlock &MBB = *MBBI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (MBBI == BeginMBBI)
|
||||
return EndMBBI;
|
||||
|
||||
// Skip debug values.
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
|
||||
--PrevMBBI;
|
||||
|
||||
Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
|
||||
return Offset == 0 ? EndMBBI : PrevMBBI;
|
||||
}
|
||||
|
||||
/// Searches for a increment or decrement of \p Reg after \p MBBI.
|
||||
static MachineBasicBlock::iterator
|
||||
findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
|
||||
Offset = 0;
|
||||
MachineBasicBlock &MBB = *MBBI->getParent();
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
// Skip debug values.
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if (NextMBBI == EndMBBI)
|
||||
return EndMBBI;
|
||||
|
||||
Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
|
||||
return Offset == 0 ? EndMBBI : NextMBBI;
|
||||
}
|
||||
|
||||
/// Fold proceeding/trailing inc/dec of base register into the
|
||||
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
|
||||
///
|
||||
@ -1148,6 +1151,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
|
||||
const MachineOperand &BaseOP = MI->getOperand(0);
|
||||
unsigned Base = BaseOP.getReg();
|
||||
bool BaseKill = BaseOP.isKill();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
unsigned PredReg = 0;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
@ -1159,24 +1163,49 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
|
||||
if (MI->getOperand(i).getReg() == Base)
|
||||
return false;
|
||||
|
||||
int Bytes = getLSMultipleTransferSize(MI);
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr
|
||||
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
|
||||
if (Mode == ARM_AM::ia && Offset == -Bytes) {
|
||||
Mode = ARM_AM::db;
|
||||
} else if (Mode == ARM_AM::ib && Offset == -Bytes) {
|
||||
Mode = ARM_AM::da;
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
|
||||
((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
|
||||
return false;
|
||||
|
||||
// Try merging with the previous instruction.
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
if (MBBI != BeginMBBI) {
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
|
||||
--PrevMBBI;
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::db;
|
||||
DoMerge = true;
|
||||
} else if (Mode == ARM_AM::ib &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::da;
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(PrevMBBI);
|
||||
}
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
// Try merging with the next instruction.
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (!DoMerge && MBBI != EndMBBI) {
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
|
||||
if (!DoMerge)
|
||||
return false;
|
||||
|
||||
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
|
||||
@ -1254,6 +1283,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
|
||||
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
|
||||
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
|
||||
@ -1265,6 +1295,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
|
||||
return false;
|
||||
|
||||
bool isLd = isLoadSingle(Opcode);
|
||||
// Can't do the merge if the destination register is the same as the would-be
|
||||
// writeback register.
|
||||
if (MI->getOperand(0).getReg() == Base)
|
||||
@ -1272,31 +1303,55 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
|
||||
unsigned PredReg = 0;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
||||
int Bytes = getLSMultipleTransferSize(MI);
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AddrOpc AddSub = ARM_AM::add;
|
||||
unsigned NewOpc = 0;
|
||||
// AM2 - 12 bits, thumb2 - 8 bits.
|
||||
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
|
||||
|
||||
// Try merging with the previous instruction.
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr
|
||||
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
|
||||
unsigned NewOpc;
|
||||
if (!isAM5 && Offset == Bytes) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
|
||||
} else if (Offset == -Bytes) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (Offset == Bytes) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
|
||||
} else if (!isAM5 && Offset == -Bytes) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
|
||||
} else
|
||||
return false;
|
||||
if (MBBI != BeginMBBI) {
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
|
||||
--PrevMBBI;
|
||||
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
} else if (!isAM5 &&
|
||||
isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
|
||||
MBB.erase(PrevMBBI);
|
||||
}
|
||||
}
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
|
||||
// Try merging with the next instruction.
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (!DoMerge && MBBI != EndMBBI) {
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if (!isAM5 &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
} else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
}
|
||||
|
||||
if (!DoMerge)
|
||||
return false;
|
||||
|
||||
bool isLd = isLoadSingle(Opcode);
|
||||
if (isAM5) {
|
||||
// VLDM[SD]_UPD, VSTM[SD]_UPD
|
||||
// (There are no base-updating versions of VLDR/VSTR instructions, but the
|
||||
@ -1313,16 +1368,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
if (isAM2) {
|
||||
// LDR_PRE, LDR_POST
|
||||
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
|
||||
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
}
|
||||
} else {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
// t2LDR_PRE, t2LDR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
@ -1334,12 +1391,13 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
// the vestigal zero-reg offset register. When that's fixed, this clause
|
||||
// can be removed entirely.
|
||||
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
|
||||
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
// STR_PRE, STR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
|
||||
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
|
||||
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
|
||||
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
// t2STR_PRE, t2STR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
|
||||
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
|
||||
@ -1351,66 +1409,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
|
||||
"Must have t2STRDi8 or t2LDRDi8");
|
||||
if (MI.getOperand(3).getImm() != 0)
|
||||
return false;
|
||||
|
||||
// Behaviour for writeback is undefined if base register is the same as one
|
||||
// of the others.
|
||||
const MachineOperand &BaseOp = MI.getOperand(2);
|
||||
unsigned Base = BaseOp.getReg();
|
||||
const MachineOperand &Reg0Op = MI.getOperand(0);
|
||||
const MachineOperand &Reg1Op = MI.getOperand(1);
|
||||
if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
|
||||
return false;
|
||||
|
||||
unsigned PredReg;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
|
||||
PredReg, Offset);
|
||||
unsigned NewOpc;
|
||||
if (Offset == 8 || Offset == -8) {
|
||||
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (Offset == 8 || Offset == -8) {
|
||||
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
|
||||
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
|
||||
MIB.addOperand(Reg0Op).addOperand(Reg1Op)
|
||||
.addReg(BaseOp.getReg(), RegState::Define);
|
||||
} else {
|
||||
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
|
||||
MIB.addReg(BaseOp.getReg(), RegState::Define)
|
||||
.addOperand(Reg0Op).addOperand(Reg1Op);
|
||||
}
|
||||
MIB.addReg(BaseOp.getReg(), RegState::Kill)
|
||||
.addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
assert(TII->get(Opcode).getNumOperands() == 6 &&
|
||||
TII->get(NewOpc).getNumOperands() == 7 &&
|
||||
"Unexpected number of operands in Opcode specification.");
|
||||
|
||||
// Transfer implicit operands.
|
||||
for (const MachineOperand &MO : MI.implicit_operands())
|
||||
MIB.addOperand(MO);
|
||||
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
|
||||
|
||||
MBB.erase(MBBI);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if instruction is a memory operation that this pass is capable
|
||||
/// of operating on.
|
||||
static bool isMemoryOp(const MachineInstr *MI) {
|
||||
@ -1618,7 +1616,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
||||
ARMCC::CondCodes CurrPred = ARMCC::AL;
|
||||
unsigned Position = 0;
|
||||
assert(Candidates.size() == 0);
|
||||
assert(MergeBaseCandidates.size() == 0);
|
||||
LiveRegsValid = false;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
|
||||
@ -1697,15 +1694,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
||||
MBBI = I;
|
||||
--Position;
|
||||
// Fallthrough to look into existing chain.
|
||||
} else if (MBBI->isDebugValue()) {
|
||||
} else if (MBBI->isDebugValue())
|
||||
continue;
|
||||
} else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
|
||||
MBBI->getOpcode() == ARM::t2STRDi8) {
|
||||
// ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
|
||||
// remember them because we may still be able to merge add/sub into them.
|
||||
MergeBaseCandidates.push_back(MBBI);
|
||||
}
|
||||
|
||||
|
||||
// If we are here then the chain is broken; Extract candidates for a merge.
|
||||
if (MemOps.size() > 0) {
|
||||
@ -1736,9 +1726,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
||||
if (Merged) {
|
||||
Changed = true;
|
||||
unsigned Opcode = Merged->getOpcode();
|
||||
if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
|
||||
MergeBaseUpdateLSDouble(*Merged);
|
||||
else
|
||||
if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
|
||||
MergeBaseUpdateLSMultiple(Merged);
|
||||
} else {
|
||||
for (MachineInstr *MI : Candidate->Instrs) {
|
||||
@ -1753,10 +1741,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
||||
}
|
||||
}
|
||||
Candidates.clear();
|
||||
// Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
|
||||
for (MachineInstr *MI : MergeBaseCandidates)
|
||||
MergeBaseUpdateLSDouble(*MI);
|
||||
MergeBaseCandidates.clear();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
@ -566,7 +566,7 @@ Robert Muth started working on an alternate jump table implementation that
|
||||
does not put the tables in-line in the text. This is more like the llvm
|
||||
default jump table implementation. This might be useful sometime. Several
|
||||
revisions of patches are on the mailing list, beginning at:
|
||||
http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
|
||||
http://lists.llvm.org/pipermail/llvm-dev/2009-June/022763.html
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
|
@ -57,7 +57,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
// Some things to try that should be better:
|
||||
// * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
|
||||
// * 'movs $dst, $src' if cpsr isn't live
|
||||
// See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
|
||||
// See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
|
||||
|
||||
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
|
||||
|
@ -864,13 +864,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
|
||||
// Check for an unused caller-saved register.
|
||||
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
|
||||
MCPhysReg FreeReg = *CallerSavedRegs;
|
||||
if (!MRI.reg_nodbg_empty(FreeReg))
|
||||
if (MRI.isPhysRegUsed(FreeReg))
|
||||
continue;
|
||||
|
||||
// Check aliased register usage.
|
||||
bool IsCurrentRegUsed = false;
|
||||
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
|
||||
if (!MRI.reg_nodbg_empty(*AI)) {
|
||||
if (MRI.isPhysRegUsed(*AI)) {
|
||||
IsCurrentRegUsed = true;
|
||||
break;
|
||||
}
|
||||
|
@ -500,14 +500,6 @@ def : MipsPat<(trunc (assertzext GPR64:$src)),
|
||||
def : MipsPat<(i32 (trunc GPR64:$src)),
|
||||
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
|
||||
|
||||
// Bypass trunc nodes for bitwise ops.
|
||||
def : MipsPat<(i32 (trunc (and GPR64:$lhs, GPR64:$rhs))),
|
||||
(EXTRACT_SUBREG (AND64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
|
||||
def : MipsPat<(i32 (trunc (or GPR64:$lhs, GPR64:$rhs))),
|
||||
(EXTRACT_SUBREG (OR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
|
||||
def : MipsPat<(i32 (trunc (xor GPR64:$lhs, GPR64:$rhs))),
|
||||
(EXTRACT_SUBREG (XOR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
|
||||
|
||||
// variable shift instructions patterns
|
||||
def : MipsPat<(shl GPR64:$rt, (i32 (trunc GPR64:$rs))),
|
||||
(DSLLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;
|
||||
|
@ -267,6 +267,9 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
|
||||
}
|
||||
|
||||
unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
|
||||
if (!TargetSupported)
|
||||
return 0;
|
||||
|
||||
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 &&
|
||||
"Alloca should always return a pointer.");
|
||||
|
||||
@ -290,12 +293,7 @@ unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
|
||||
return 0;
|
||||
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
|
||||
const ConstantInt *CI = cast<ConstantInt>(C);
|
||||
int64_t Imm;
|
||||
if ((VT != MVT::i1) && CI->isNegative())
|
||||
Imm = CI->getSExtValue();
|
||||
else
|
||||
Imm = CI->getZExtValue();
|
||||
return materialize32BitInt(Imm, RC);
|
||||
return materialize32BitInt(CI->getZExtValue(), RC);
|
||||
}
|
||||
|
||||
unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
|
||||
@ -382,6 +380,9 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
|
||||
// Materialize a constant into a register, and return the register
|
||||
// number (or zero if we failed to handle it).
|
||||
unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
|
||||
if (!TargetSupported)
|
||||
return 0;
|
||||
|
||||
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
|
||||
|
||||
// Only handle simple types.
|
||||
@ -981,6 +982,13 @@ bool MipsFastISel::selectSelect(const Instruction *I) {
|
||||
if (!Src1Reg || !Src2Reg || !CondReg)
|
||||
return false;
|
||||
|
||||
unsigned ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
|
||||
if (!ZExtCondReg)
|
||||
return false;
|
||||
|
||||
if (!emitIntExt(MVT::i1, CondReg, MVT::i32, ZExtCondReg, true))
|
||||
return false;
|
||||
|
||||
unsigned ResultReg = createResultReg(RC);
|
||||
unsigned TempReg = createResultReg(RC);
|
||||
|
||||
@ -989,7 +997,7 @@ bool MipsFastISel::selectSelect(const Instruction *I) {
|
||||
|
||||
emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg);
|
||||
emitInst(CondMovOpc, ResultReg)
|
||||
.addReg(Src1Reg).addReg(CondReg).addReg(TempReg);
|
||||
.addReg(Src1Reg).addReg(ZExtCondReg).addReg(TempReg);
|
||||
updateValueMap(I, ResultReg);
|
||||
return true;
|
||||
}
|
||||
@ -1232,12 +1240,19 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
|
||||
}
|
||||
|
||||
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
||||
if (!TargetSupported)
|
||||
return false;
|
||||
|
||||
CallingConv::ID CC = CLI.CallConv;
|
||||
bool IsTailCall = CLI.IsTailCall;
|
||||
bool IsVarArg = CLI.IsVarArg;
|
||||
const Value *Callee = CLI.Callee;
|
||||
MCSymbol *Symbol = CLI.Symbol;
|
||||
|
||||
// Do not handle FastCC.
|
||||
if (CC == CallingConv::Fast)
|
||||
return false;
|
||||
|
||||
// Allow SelectionDAG isel to handle tail calls.
|
||||
if (IsTailCall)
|
||||
return false;
|
||||
@ -1312,6 +1327,9 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
|
||||
}
|
||||
|
||||
bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
|
||||
if (!TargetSupported)
|
||||
return false;
|
||||
|
||||
switch (II->getIntrinsicID()) {
|
||||
default:
|
||||
return false;
|
||||
@ -1415,6 +1433,11 @@ bool MipsFastISel::selectRet(const Instruction *I) {
|
||||
|
||||
if (Ret->getNumOperands() > 0) {
|
||||
CallingConv::ID CC = F.getCallingConv();
|
||||
|
||||
// Do not handle FastCC.
|
||||
if (CC == CallingConv::Fast)
|
||||
return false;
|
||||
|
||||
SmallVector<ISD::OutputArg, 4> Outs;
|
||||
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineJumpTableInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
#include "llvm/IR/CallingConv.h"
|
||||
@ -53,11 +54,6 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
|
||||
cl::desc("MIPS: Don't trap on integer division by zero."),
|
||||
cl::init(false));
|
||||
|
||||
cl::opt<bool>
|
||||
EnableMipsFastISel("mips-fast-isel", cl::Hidden,
|
||||
cl::desc("Allow mips-fast-isel to be used"),
|
||||
cl::init(false));
|
||||
|
||||
static const MCPhysReg Mips64DPRegs[8] = {
|
||||
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
|
||||
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
|
||||
@ -461,7 +457,7 @@ const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM
|
||||
FastISel *
|
||||
MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
|
||||
const TargetLibraryInfo *libInfo) const {
|
||||
if (!EnableMipsFastISel)
|
||||
if (!funcInfo.MF->getTarget().Options.EnableFastISel)
|
||||
return TargetLowering::createFastISel(funcInfo, libInfo);
|
||||
return Mips::createFastISel(funcInfo, libInfo);
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include "llvm/MC/MCFixedLenDisassembler.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
@ -22,10 +23,12 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
|
||||
|
||||
namespace {
|
||||
class PPCDisassembler : public MCDisassembler {
|
||||
bool IsLittleEndian;
|
||||
|
||||
public:
|
||||
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
|
||||
: MCDisassembler(STI, Ctx) {}
|
||||
~PPCDisassembler() override {}
|
||||
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
|
||||
bool IsLittleEndian)
|
||||
: MCDisassembler(STI, Ctx), IsLittleEndian(IsLittleEndian) {}
|
||||
|
||||
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
|
||||
ArrayRef<uint8_t> Bytes, uint64_t Address,
|
||||
@ -37,7 +40,13 @@ public:
|
||||
static MCDisassembler *createPPCDisassembler(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
return new PPCDisassembler(STI, Ctx);
|
||||
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/false);
|
||||
}
|
||||
|
||||
static MCDisassembler *createPPCLEDisassembler(const Target &T,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializePowerPCDisassembler() {
|
||||
@ -47,7 +56,7 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
|
||||
TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
|
||||
createPPCDisassembler);
|
||||
TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
|
||||
createPPCDisassembler);
|
||||
createPPCLEDisassembler);
|
||||
}
|
||||
|
||||
// FIXME: These can be generated by TableGen from the existing register
|
||||
@ -383,9 +392,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
|
||||
return MCDisassembler::Fail;
|
||||
}
|
||||
|
||||
// The instruction is big-endian encoded.
|
||||
uint32_t Inst =
|
||||
(Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
|
||||
// Read the instruction in the proper endianness.
|
||||
uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data())
|
||||
: support::endian::read32be(Bytes.data());
|
||||
|
||||
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
|
||||
DecodeStatus result =
|
||||
|
@ -363,71 +363,85 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
|
||||
SM.recordPatchPoint(MI);
|
||||
PatchPointOpers Opers(&MI);
|
||||
|
||||
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
|
||||
unsigned EncodedBytes = 0;
|
||||
if (CallTarget) {
|
||||
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
|
||||
"High 16 bits of call target should be zero.");
|
||||
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
|
||||
EncodedBytes = 0;
|
||||
// Materialize the jump address:
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 32) & 0xFFFF));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(32).addImm(16));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 16) & 0xFFFF));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(CallTarget & 0xFFFF));
|
||||
const MachineOperand &CalleeMO =
|
||||
Opers.getMetaOper(PatchPointOpers::TargetPos);
|
||||
|
||||
// Save the current TOC pointer before the remote call.
|
||||
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
|
||||
.addReg(PPC::X2)
|
||||
.addImm(TOCSaveOffset)
|
||||
.addReg(PPC::X1));
|
||||
++EncodedBytes;
|
||||
if (CalleeMO.isImm()) {
|
||||
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
|
||||
if (CallTarget) {
|
||||
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
|
||||
"High 16 bits of call target should be zero.");
|
||||
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
|
||||
EncodedBytes = 0;
|
||||
// Materialize the jump address:
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 32) & 0xFFFF));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(32).addImm(16));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm((CallTarget >> 16) & 0xFFFF));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
|
||||
.addReg(ScratchReg)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(CallTarget & 0xFFFF));
|
||||
|
||||
// Save the current TOC pointer before the remote call.
|
||||
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
|
||||
.addReg(PPC::X2)
|
||||
.addImm(TOCSaveOffset)
|
||||
.addReg(PPC::X1));
|
||||
++EncodedBytes;
|
||||
|
||||
|
||||
// If we're on ELFv1, then we need to load the actual function pointer from
|
||||
// the function descriptor.
|
||||
if (!Subtarget->isELFv2ABI()) {
|
||||
// Load the new TOC pointer and the function address, but not r11
|
||||
// (needing this is rare, and loading it here would prevent passing it
|
||||
// via a 'nest' parameter.
|
||||
// If we're on ELFv1, then we need to load the actual function pointer
|
||||
// from the function descriptor.
|
||||
if (!Subtarget->isELFv2ABI()) {
|
||||
// Load the new TOC pointer and the function address, but not r11
|
||||
// (needing this is rare, and loading it here would prevent passing it
|
||||
// via a 'nest' parameter.
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
|
||||
.addReg(PPC::X2)
|
||||
.addImm(8)
|
||||
.addReg(ScratchReg));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(0)
|
||||
.addReg(ScratchReg));
|
||||
++EncodedBytes;
|
||||
}
|
||||
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8)
|
||||
.addReg(ScratchReg));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
|
||||
++EncodedBytes;
|
||||
|
||||
// Restore the TOC pointer after the call.
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
|
||||
.addReg(PPC::X2)
|
||||
.addImm(8)
|
||||
.addReg(ScratchReg));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
|
||||
.addReg(ScratchReg)
|
||||
.addImm(0)
|
||||
.addReg(ScratchReg));
|
||||
.addImm(TOCSaveOffset)
|
||||
.addReg(PPC::X1));
|
||||
++EncodedBytes;
|
||||
}
|
||||
} else if (CalleeMO.isGlobal()) {
|
||||
const GlobalValue *GValue = CalleeMO.getGlobal();
|
||||
MCSymbol *MOSymbol = getSymbol(GValue);
|
||||
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
|
||||
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
|
||||
++EncodedBytes;
|
||||
|
||||
// Restore the TOC pointer after the call.
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
|
||||
.addReg(PPC::X2)
|
||||
.addImm(TOCSaveOffset)
|
||||
.addReg(PPC::X1));
|
||||
++EncodedBytes;
|
||||
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP)
|
||||
.addExpr(SymVar));
|
||||
EncodedBytes += 2;
|
||||
}
|
||||
|
||||
// Each instruction is 4 bytes.
|
||||
|
@ -306,10 +306,9 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
|
||||
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
|
||||
const MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
unsigned UsedRegMask = 0;
|
||||
for (unsigned i = 0; i != 32; ++i)
|
||||
if (MRI.isPhysRegModified(VRRegNo[i]))
|
||||
if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
|
||||
UsedRegMask |= 1 << (31-i);
|
||||
|
||||
// Live in and live out values already must be in the mask, so don't bother
|
||||
|
@ -2305,14 +2305,15 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
|
||||
if (Swap)
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
EVT ResVT = VecVT.changeVectorElementTypeToInteger();
|
||||
if (Negate) {
|
||||
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
|
||||
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
|
||||
return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
|
||||
PPC::VNOR,
|
||||
VecVT, VCmp, VCmp);
|
||||
ResVT, VCmp, VCmp);
|
||||
}
|
||||
|
||||
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
|
||||
return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
|
||||
}
|
||||
|
||||
if (PPCSubTarget->useCRBits())
|
||||
|
@ -580,6 +580,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
|
||||
addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
|
||||
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
|
||||
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
|
||||
|
||||
@ -1416,7 +1417,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
|
||||
} else
|
||||
return -1;
|
||||
|
||||
if (ShuffleKind == 2 && isLE)
|
||||
if (isLE)
|
||||
ShiftAmt = 16 - ShiftAmt;
|
||||
|
||||
return ShiftAmt;
|
||||
@ -1429,6 +1430,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
|
||||
assert(N->getValueType(0) == MVT::v16i8 &&
|
||||
(EltSize == 1 || EltSize == 2 || EltSize == 4));
|
||||
|
||||
// The consecutive indices need to specify an element, not part of two
|
||||
// different elements. So abandon ship early if this isn't the case.
|
||||
if (N->getMaskElt(0) % EltSize != 0)
|
||||
return false;
|
||||
|
||||
// This is a splat operation if each element of the permute is the same, and
|
||||
// if the value doesn't reference the second vector.
|
||||
unsigned ElementBase = N->getMaskElt(0);
|
||||
@ -7011,17 +7017,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
// t = vsplti c, result = vsldoi t, t, 1
|
||||
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
// t = vsplti c, result = vsldoi t, t, 2
|
||||
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
// t = vsplti c, result = vsldoi t, t, 3
|
||||
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
|
||||
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
|
||||
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
|
||||
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
|
||||
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
|
||||
}
|
||||
}
|
||||
|
||||
@ -9957,6 +9966,9 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
|
||||
if (Src.getValueType() == MVT::f32) {
|
||||
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
|
||||
DCI.AddToWorklist(Src.getNode());
|
||||
} else if (Src.getValueType() != MVT::f64) {
|
||||
// Make sure that we don't pick up a ppc_fp128 source value.
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
unsigned FCTOp =
|
||||
|
@ -106,7 +106,7 @@ for 1,2,4,8 bytes.
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
It would be nice to revert this patch:
|
||||
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
|
||||
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
|
||||
|
||||
And teach the dag combiner enough to simplify the code expanded before
|
||||
legalize. It seems plausible that this knowledge would let it simplify other
|
||||
|
@ -190,11 +190,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
|
||||
{
|
||||
|
||||
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
|
||||
if (!MRI->reg_nodbg_empty(reg))
|
||||
if (MRI->isPhysRegUsed(reg))
|
||||
return false;
|
||||
|
||||
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
|
||||
if (!MRI->reg_nodbg_empty(reg))
|
||||
if (MRI->isPhysRegUsed(reg))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -206,10 +206,10 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
return !(MFI->hasCalls() // has calls
|
||||
|| !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
|
||||
|| !MRI.reg_nodbg_empty(SP::O6) // %SP is used
|
||||
|| hasFP(MF)); // need %FP
|
||||
return !(MFI->hasCalls() // has calls
|
||||
|| MRI.isPhysRegUsed(SP::L0) // Too many registers needed
|
||||
|| MRI.isPhysRegUsed(SP::O6) // %SP is used
|
||||
|| hasFP(MF)); // need %FP
|
||||
}
|
||||
|
||||
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
|
||||
@ -218,13 +218,16 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
|
||||
|
||||
// Remap %i[0-7] to %o[0-7].
|
||||
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
|
||||
if (MRI.reg_nodbg_empty(reg))
|
||||
if (!MRI.isPhysRegUsed(reg))
|
||||
continue;
|
||||
unsigned mapped_reg = (reg - SP::I0 + SP::O0);
|
||||
assert(MRI.reg_nodbg_empty(mapped_reg));
|
||||
assert(!MRI.isPhysRegUsed(mapped_reg));
|
||||
|
||||
// Replace I register with O register.
|
||||
MRI.replaceRegWith(reg, mapped_reg);
|
||||
|
||||
// Mark the reg unused.
|
||||
MRI.setPhysRegUnused(reg);
|
||||
}
|
||||
|
||||
// Rewrite MBB's Live-ins.
|
||||
|
@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[
|
||||
CCIfSubtarget<"hasVector()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
|
||||
|
||||
// ABI-compliant code returns long double by reference, but that conversion
|
||||
// is left to higher-level code. Perhaps we could add an f128 definition
|
||||
// here for code that doesn't care about the ABI?
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1175,6 +1175,20 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
return Chain;
|
||||
}
|
||||
|
||||
bool SystemZTargetLowering::
|
||||
CanLowerReturn(CallingConv::ID CallConv,
|
||||
MachineFunction &MF, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
LLVMContext &Context) const {
|
||||
// Detect unsupported vector return types.
|
||||
if (Subtarget.hasVector())
|
||||
VerifyVectorTypes(Outs);
|
||||
|
||||
SmallVector<CCValAssign, 16> RetLocs;
|
||||
CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
|
||||
return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
|
||||
}
|
||||
|
||||
SDValue
|
||||
SystemZTargetLowering::LowerReturn(SDValue Chain,
|
||||
CallingConv::ID CallConv, bool IsVarArg,
|
||||
|
@ -423,6 +423,10 @@ public:
|
||||
SDValue LowerCall(CallLoweringInfo &CLI,
|
||||
SmallVectorImpl<SDValue> &InVals) const override;
|
||||
|
||||
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
LLVMContext &Context) const override;
|
||||
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
|
@ -681,6 +681,9 @@ private:
|
||||
|
||||
std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
|
||||
std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
|
||||
void AddDefaultSrcDestOperands(
|
||||
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
|
||||
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
|
||||
std::unique_ptr<X86Operand> ParseOperand();
|
||||
std::unique_ptr<X86Operand> ParseATTOperand();
|
||||
std::unique_ptr<X86Operand> ParseIntelOperand();
|
||||
@ -1014,6 +1017,19 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
|
||||
Loc, Loc, 0);
|
||||
}
|
||||
|
||||
void X86AsmParser::AddDefaultSrcDestOperands(
|
||||
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
|
||||
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
|
||||
if (isParsingIntelSyntax()) {
|
||||
Operands.push_back(std::move(Dst));
|
||||
Operands.push_back(std::move(Src));
|
||||
}
|
||||
else {
|
||||
Operands.push_back(std::move(Src));
|
||||
Operands.push_back(std::move(Dst));
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
|
||||
if (isParsingIntelSyntax())
|
||||
return ParseIntelOperand();
|
||||
@ -2228,26 +2244,18 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
if (Name.startswith("ins") && Operands.size() == 1 &&
|
||||
(Name == "insb" || Name == "insw" || Name == "insl" ||
|
||||
Name == "insd" )) {
|
||||
if (isParsingIntelSyntax()) {
|
||||
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
} else {
|
||||
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
}
|
||||
AddDefaultSrcDestOperands(Operands,
|
||||
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
|
||||
DefaultMemDIOperand(NameLoc));
|
||||
}
|
||||
|
||||
// Append default arguments to "outs[bwld]"
|
||||
if (Name.startswith("outs") && Operands.size() == 1 &&
|
||||
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
|
||||
Name == "outsd" )) {
|
||||
if (isParsingIntelSyntax()) {
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
|
||||
} else {
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
|
||||
}
|
||||
AddDefaultSrcDestOperands(Operands,
|
||||
DefaultMemSIOperand(NameLoc),
|
||||
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
|
||||
}
|
||||
|
||||
// Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
|
||||
@ -2279,13 +2287,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
(Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
|
||||
Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
|
||||
if (Operands.size() == 1) {
|
||||
if (isParsingIntelSyntax()) {
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
} else {
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
}
|
||||
AddDefaultSrcDestOperands(Operands,
|
||||
DefaultMemDIOperand(NameLoc),
|
||||
DefaultMemSIOperand(NameLoc));
|
||||
} else if (Operands.size() == 3) {
|
||||
X86Operand &Op = (X86Operand &)*Operands[1];
|
||||
X86Operand &Op2 = (X86Operand &)*Operands[2];
|
||||
@ -2305,13 +2309,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
|
||||
if (Operands.size() == 1) {
|
||||
if (Name == "movsd")
|
||||
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
|
||||
if (isParsingIntelSyntax()) {
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
} else {
|
||||
Operands.push_back(DefaultMemSIOperand(NameLoc));
|
||||
Operands.push_back(DefaultMemDIOperand(NameLoc));
|
||||
}
|
||||
AddDefaultSrcDestOperands(Operands,
|
||||
DefaultMemSIOperand(NameLoc),
|
||||
DefaultMemDIOperand(NameLoc));
|
||||
} else if (Operands.size() == 3) {
|
||||
X86Operand &Op = (X86Operand &)*Operands[1];
|
||||
X86Operand &Op2 = (X86Operand &)*Operands[2];
|
||||
|
@ -301,9 +301,8 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
|
||||
bool FPIsUsed = false;
|
||||
|
||||
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
for (unsigned i = 0; i <= 6; ++i)
|
||||
if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
|
||||
if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
|
||||
FPIsUsed = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1682,6 +1682,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
|
||||
.addImm(StackSize);
|
||||
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
|
||||
.addImm(X86FI->getArgumentStackSize());
|
||||
MF.getRegInfo().setPhysRegUsed(Reg10);
|
||||
MF.getRegInfo().setPhysRegUsed(Reg11);
|
||||
} else {
|
||||
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
|
||||
.addImm(X86FI->getArgumentStackSize());
|
||||
|
@ -12640,24 +12640,29 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
if (User->getOpcode() == ISD::FNEG)
|
||||
return Op;
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
|
||||
|
||||
SDLoc dl(Op);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
// Assume scalar op for initialization; update for vector if needed.
|
||||
// Note that there are no scalar bitwise logical SSE/AVX instructions, so we
|
||||
// generate a 16-byte vector constant and logic op even for the scalar case.
|
||||
// Using a 16-byte mask allows folding the load of the mask with
|
||||
// the logic op, so it can save (~4 bytes) on code size.
|
||||
MVT EltVT = VT;
|
||||
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
|
||||
|
||||
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
|
||||
// decide if we should generate a 16-byte constant mask when we only need 4 or
|
||||
// 8 bytes for the scalar case.
|
||||
|
||||
MVT LogicVT;
|
||||
MVT EltVT;
|
||||
unsigned NumElts;
|
||||
|
||||
if (VT.isVector()) {
|
||||
LogicVT = VT;
|
||||
EltVT = VT.getVectorElementType();
|
||||
NumElts = VT.getVectorNumElements();
|
||||
} else {
|
||||
// There are no scalar bitwise logical SSE/AVX instructions, so we
|
||||
// generate a 16-byte vector constant and logic op even for the scalar case.
|
||||
// Using a 16-byte mask allows folding the load of the mask with
|
||||
// the logic op, so it can save (~4 bytes) on code size.
|
||||
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
|
||||
EltVT = VT;
|
||||
NumElts = (VT == MVT::f64) ? 2 : 4;
|
||||
}
|
||||
|
||||
unsigned EltBits = EltVT.getSizeInBits();
|
||||
@ -12670,26 +12675,25 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
|
||||
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
|
||||
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
|
||||
SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, Alignment);
|
||||
|
||||
if (VT.isVector()) {
|
||||
// For a vector, cast operands to a vector type, perform the logic op,
|
||||
// and cast the result back to the original value type.
|
||||
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
|
||||
SDValue MaskCasted = DAG.getBitcast(VecVT, Mask);
|
||||
SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0))
|
||||
: DAG.getBitcast(VecVT, Op0);
|
||||
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
|
||||
return DAG.getBitcast(VT,
|
||||
DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
|
||||
}
|
||||
|
||||
// If not vector, then scalar.
|
||||
unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
|
||||
unsigned LogicOp =
|
||||
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
|
||||
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
|
||||
return DAG.getNode(BitOp, dl, VT, Operand, Mask);
|
||||
|
||||
if (VT.isVector())
|
||||
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
|
||||
|
||||
// For the scalar case extend to a 128-bit vector, perform the logic op,
|
||||
// and extract the scalar result back out.
|
||||
Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand);
|
||||
SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
@ -12729,10 +12733,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
Constant *C = ConstantVector::get(CV);
|
||||
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
|
||||
SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
|
||||
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
|
||||
// Perform all logic operations as 16-byte vectors because there are no
|
||||
// scalar FP logic instructions in SSE. This allows load folding of the
|
||||
// constants into the logic instructions.
|
||||
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
|
||||
SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, 16);
|
||||
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
|
||||
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
|
||||
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
|
||||
|
||||
// Next, clear the sign bit from the first operand (magnitude).
|
||||
// If it's a constant, we can clear it here.
|
||||
@ -12740,7 +12750,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
APFloat APF = Op0CN->getValueAPF();
|
||||
// If the magnitude is a positive zero, the sign bit alone is enough.
|
||||
if (APF.isPosZero())
|
||||
return SignBit;
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
APF.clearSign();
|
||||
CV[0] = ConstantFP::get(*Context, APF);
|
||||
} else {
|
||||
@ -12750,15 +12761,18 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
}
|
||||
C = ConstantVector::get(CV);
|
||||
CPIdx = DAG.getConstantPool(C, PtrVT, 16);
|
||||
SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
|
||||
SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
|
||||
MachinePointerInfo::getConstantPool(),
|
||||
false, false, false, 16);
|
||||
// If the magnitude operand wasn't a constant, we need to AND out the sign.
|
||||
if (!isa<ConstantFPSDNode>(Op0))
|
||||
Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Val);
|
||||
|
||||
if (!isa<ConstantFPSDNode>(Op0)) {
|
||||
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
|
||||
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
|
||||
}
|
||||
// OR the magnitude value with the sign bit.
|
||||
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
|
||||
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
|
||||
|
@ -956,18 +956,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
|
||||
{ X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
|
||||
{ X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
|
||||
|
||||
// FIXME: We should not be folding Fs* scalar loads into vector
|
||||
// instructions because the vector instructions require vector-sized
|
||||
// loads. Lowering should create vector-sized instructions (the Fv*
|
||||
// variants below) to allow load folding.
|
||||
{ X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
|
||||
{ X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
|
||||
{ X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
|
||||
{ X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
|
||||
{ X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
|
||||
{ X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
|
||||
{ X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
|
||||
{ X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
|
||||
// Do not fold Fs* scalar logical op loads because there are no scalar
|
||||
// load variants for these instructions. When folded, the load is required
|
||||
// to be 128-bits, so the load size would not match.
|
||||
|
||||
{ X86::FvANDNPDrr, X86::FvANDNPDrm, TB_ALIGN_16 },
|
||||
{ X86::FvANDNPSrr, X86::FvANDNPSrm, TB_ALIGN_16 },
|
||||
|
@ -2919,6 +2919,14 @@ multiclass sse12_fp_packed_vector_logical_alias<
|
||||
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
|
||||
VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>,
|
||||
PD, VEX_4V;
|
||||
|
||||
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
|
||||
VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
|
||||
PS, VEX_4V, VEX_L;
|
||||
|
||||
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
|
||||
VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
|
||||
PD, VEX_4V, VEX_L;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
@ -93,7 +93,8 @@ static Value *getFCmpValue(bool isordered, unsigned code,
|
||||
case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
|
||||
case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
|
||||
case 7:
|
||||
if (!isordered) return ConstantInt::getTrue(LHS->getContext());
|
||||
if (!isordered)
|
||||
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
|
||||
Pred = FCmpInst::FCMP_ORD; break;
|
||||
}
|
||||
return Builder->CreateFCmp(Pred, LHS, RHS);
|
||||
|
@ -2112,9 +2112,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
|
||||
bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
|
||||
Value *RHS, Instruction &OrigI,
|
||||
Value *&Result, Constant *&Overflow) {
|
||||
assert((!OrigI.isCommutative() ||
|
||||
!(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
|
||||
"call with a constant RHS if possible!");
|
||||
if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
|
||||
std::swap(LHS, RHS);
|
||||
|
||||
auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
|
||||
Result = OpResult;
|
||||
|
@ -658,7 +658,7 @@ bool EarlyCSE::run() {
|
||||
// gains over vector when the container becomes very large due to the
|
||||
// specific access patterns. For more information see the mailing list
|
||||
// discussion on this:
|
||||
// http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
|
||||
// http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
|
||||
std::deque<StackNode *> nodesToProcess;
|
||||
|
||||
bool Changed = false;
|
||||
|
@ -1847,10 +1847,17 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
|
||||
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
|
||||
if (OldTy == NewTy)
|
||||
return true;
|
||||
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
|
||||
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
|
||||
if (NewITy->getBitWidth() >= OldITy->getBitWidth())
|
||||
return true;
|
||||
|
||||
// For integer types, we can't handle any bit-width differences. This would
|
||||
// break both vector conversions with extension and introduce endianness
|
||||
// issues when in conjunction with loads and stores.
|
||||
if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
|
||||
assert(cast<IntegerType>(OldTy)->getBitWidth() !=
|
||||
cast<IntegerType>(NewTy)->getBitWidth() &&
|
||||
"We can't have the same bitwidth for different int types");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
|
||||
return false;
|
||||
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
|
||||
@ -1885,10 +1892,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
|
||||
if (OldTy == NewTy)
|
||||
return V;
|
||||
|
||||
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
|
||||
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
|
||||
if (NewITy->getBitWidth() > OldITy->getBitWidth())
|
||||
return IRB.CreateZExt(V, NewITy);
|
||||
assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
|
||||
"Integer types must be the exact same to convert.");
|
||||
|
||||
// See if we need inttoptr for this type pair. A cast involving both scalars
|
||||
// and vectors requires and additional bitcast.
|
||||
@ -2134,6 +2139,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
|
||||
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
|
||||
if (LI->isVolatile())
|
||||
return false;
|
||||
// We can't handle loads that extend past the allocated memory.
|
||||
if (DL.getTypeStoreSize(LI->getType()) > Size)
|
||||
return false;
|
||||
// Note that we don't count vector loads or stores as whole-alloca
|
||||
// operations which enable integer widening because we would prefer to use
|
||||
// vector widening instead.
|
||||
@ -2152,6 +2160,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
|
||||
Type *ValueTy = SI->getValueOperand()->getType();
|
||||
if (SI->isVolatile())
|
||||
return false;
|
||||
// We can't handle stores that extend past the allocated memory.
|
||||
if (DL.getTypeStoreSize(ValueTy) > Size)
|
||||
return false;
|
||||
// Note that we don't count vector loads or stores as whole-alloca
|
||||
// operations which enable integer widening because we would prefer to use
|
||||
// vector widening instead.
|
||||
@ -2585,6 +2596,7 @@ private:
|
||||
|
||||
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
|
||||
: LI.getType();
|
||||
const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
|
||||
bool IsPtrAdjusted = false;
|
||||
Value *V;
|
||||
if (VecTy) {
|
||||
@ -2592,13 +2604,27 @@ private:
|
||||
} else if (IntTy && LI.getType()->isIntegerTy()) {
|
||||
V = rewriteIntegerLoad(LI);
|
||||
} else if (NewBeginOffset == NewAllocaBeginOffset &&
|
||||
canConvertValue(DL, NewAllocaTy, LI.getType())) {
|
||||
NewEndOffset == NewAllocaEndOffset &&
|
||||
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
|
||||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
|
||||
TargetTy->isIntegerTy()))) {
|
||||
LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
|
||||
LI.isVolatile(), LI.getName());
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
|
||||
|
||||
V = NewLI;
|
||||
|
||||
// If this is an integer load past the end of the slice (which means the
|
||||
// bytes outside the slice are undef or this load is dead) just forcibly
|
||||
// fix the integer size with correct handling of endianness.
|
||||
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
|
||||
if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
|
||||
if (AITy->getBitWidth() < TITy->getBitWidth()) {
|
||||
V = IRB.CreateZExt(V, TITy, "load.ext");
|
||||
if (DL.isBigEndian())
|
||||
V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
|
||||
"endian_shift");
|
||||
}
|
||||
} else {
|
||||
Type *LTy = TargetTy->getPointerTo();
|
||||
LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
|
||||
@ -2718,10 +2744,25 @@ private:
|
||||
if (IntTy && V->getType()->isIntegerTy())
|
||||
return rewriteIntegerStore(V, SI);
|
||||
|
||||
const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
|
||||
StoreInst *NewSI;
|
||||
if (NewBeginOffset == NewAllocaBeginOffset &&
|
||||
NewEndOffset == NewAllocaEndOffset &&
|
||||
canConvertValue(DL, V->getType(), NewAllocaTy)) {
|
||||
(canConvertValue(DL, V->getType(), NewAllocaTy) ||
|
||||
(IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
|
||||
V->getType()->isIntegerTy()))) {
|
||||
// If this is an integer store past the end of slice (and thus the bytes
|
||||
// past that point are irrelevant or this is unreachable), truncate the
|
||||
// value prior to storing.
|
||||
if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
|
||||
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
|
||||
if (VITy->getBitWidth() > AITy->getBitWidth()) {
|
||||
if (DL.isBigEndian())
|
||||
V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
|
||||
"endian_shift");
|
||||
V = IRB.CreateTrunc(V, AITy, "load.trunc");
|
||||
}
|
||||
|
||||
V = convertValue(DL, IRB, V, NewAllocaTy);
|
||||
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
|
||||
SI.isVolatile());
|
||||
|
@ -227,10 +227,16 @@ Value *Scatterer::operator[](unsigned I) {
|
||||
if (!Idx)
|
||||
break;
|
||||
unsigned J = Idx->getZExtValue();
|
||||
CV[J] = Insert->getOperand(1);
|
||||
V = Insert->getOperand(0);
|
||||
if (I == J)
|
||||
if (I == J) {
|
||||
CV[J] = Insert->getOperand(1);
|
||||
return CV[J];
|
||||
} else if (!CV[J]) {
|
||||
// Only cache the first entry we find for each index we're not actively
|
||||
// searching for. This prevents us from going too far up the chain and
|
||||
// caching incorrect entries.
|
||||
CV[J] = Insert->getOperand(1);
|
||||
}
|
||||
}
|
||||
CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
|
||||
V->getName() + ".i" + Twine(I));
|
||||
|
@ -228,3 +228,51 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind {
|
||||
; CHECK-LABEL: @test12(
|
||||
; CHECK: ret i32 %r
|
||||
}
|
||||
|
||||
@P = internal global i32 715827882, align 4
|
||||
@Q = internal global i32 715827883, align 4
|
||||
@.str = private unnamed_addr constant [7 x i8] c"%u %u\0A\00", align 1
|
||||
|
||||
; Make sure we recognize that u[0] and u[Global + Cst] may alias
|
||||
; when the addition has wrapping semantic.
|
||||
; PR24468.
|
||||
; CHECK-LABEL: @test13(
|
||||
; Make sure the stores appear before the related loads.
|
||||
; CHECK: store i8 42,
|
||||
; CHECK: store i8 99,
|
||||
; Find the loads and make sure they are used in the arguments to the printf.
|
||||
; CHECK: [[T0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %t, i32 0, i32 0
|
||||
; CHECK: [[T0:%[a-zA-Z0-9_]+]] = load i8, i8* [[T0ADDR]], align 1
|
||||
; CHECK: [[T0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[T0]] to i32
|
||||
; CHECK: [[U0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %u, i32 0, i32 0
|
||||
; CHECK: [[U0:%[a-zA-Z0-9_]+]] = load i8, i8* [[U0ADDR]], align 1
|
||||
; CHECK: [[U0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[U0]] to i32
|
||||
; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]])
|
||||
; CHECK: ret
|
||||
define void @test13() {
|
||||
entry:
|
||||
%t = alloca [3 x i8], align 1
|
||||
%u = alloca [3 x i8], align 1
|
||||
%tmp = load i32, i32* @P, align 4
|
||||
%tmp1 = mul i32 %tmp, 3
|
||||
%mul = add i32 %tmp1, -2147483646
|
||||
%idxprom = zext i32 %mul to i64
|
||||
%arrayidx = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 %idxprom
|
||||
store i8 42, i8* %arrayidx, align 1
|
||||
%tmp2 = load i32, i32* @Q, align 4
|
||||
%tmp3 = mul i32 %tmp2, 3
|
||||
%mul2 = add i32 %tmp3, 2147483647
|
||||
%idxprom3 = zext i32 %mul2 to i64
|
||||
%arrayidx4 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 %idxprom3
|
||||
store i8 99, i8* %arrayidx4, align 1
|
||||
%arrayidx5 = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 0
|
||||
%tmp4 = load i8, i8* %arrayidx5, align 1
|
||||
%conv = zext i8 %tmp4 to i32
|
||||
%arrayidx6 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 0
|
||||
%tmp5 = load i8, i8* %arrayidx6, align 1
|
||||
%conv7 = zext i8 %tmp5 to i32
|
||||
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 %conv, i32 %conv7)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @printf(i8*, ...)
|
||||
|
@ -39,7 +39,6 @@ return:
|
||||
|
||||
; CHECK-LABEL: pr18068
|
||||
; CHECK: MayAlias: i32* %0, i32* %arrayidx5
|
||||
; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
|
||||
|
||||
define i32 @pr18068(i32* %jj7, i32* %j) {
|
||||
entry:
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user