Import llvm 3.7.0 release (r246257).

This commit is contained in:
Dimitry Andric 2015-09-06 18:34:38 +00:00
parent ee8648bdac
commit 69156b4c20
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist/; revision=287510
svn path=/vendor/llvm/llvm-release_370-r246257/; revision=287511; tag=vendor/llvm/llvm-release_370-r246257
182 changed files with 3775 additions and 1316 deletions

View File

@ -61,7 +61,7 @@ set(CMAKE_MODULE_PATH
set(LLVM_VERSION_MAJOR 3)
set(LLVM_VERSION_MINOR 7)
set(LLVM_VERSION_PATCH 0)
set(LLVM_VERSION_SUFFIX svn)
set(LLVM_VERSION_SUFFIX "")
if (NOT PACKAGE_VERSION)
set(PACKAGE_VERSION
@ -518,7 +518,7 @@ if (APPLE)
else(UNIX)
if(NOT DEFINED CMAKE_INSTALL_RPATH)
set(CMAKE_INSTALL_RPATH "\$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}")
if (${CMAKE_SYSTEM_NAME} MATCHES FreeBSD)
if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,origin")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,origin")
endif()
@ -544,12 +544,12 @@ if(LLVM_USE_HOST_TOOLS)
include(CrossCompile)
endif(LLVM_USE_HOST_TOOLS)
if( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
# On FreeBSD, /usr/local/* is not used by default. In order to build LLVM
# with libxml2, iconv.h, etc., we must add /usr/local paths.
include_directories("/usr/local/include")
link_directories("/usr/local/lib")
endif( ${CMAKE_SYSTEM_NAME} MATCHES FreeBSD )
endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include llvm/Support/Solaris.h")

View File

@ -465,3 +465,47 @@ N: Bob Wilson
E: bob.wilson@acm.org
D: Advanced SIMD (NEON) support in the ARM backend.
N: Alexey Bataev
E: a.bataev@hotmail.com
D: Clang OpenMP implementation
N: Andrey Bokhanko
E: andreybokhanko@gmail.com
D: Clang OpenMP implementation
N: Carlo Bertolli
E: cbertol@us.ibm.com
D: Clang OpenMP implementation
N: Eric Stotzer
E: estotzer@ti.com
D: Clang OpenMP implementation
N: Kelvin Li
E: kkwli0@gmail.com
D: Clang OpenMP implementation
N: Samuel Antao
E: sfantao@us.ibm.com
D: Clang OpenMP implementation
N: Sergey Ostanevich
E: sergos.gnu@gmail.com
D: Clang OpenMP implementation
N: Alexandre Eichenberger
E: alexe@us.ibm.com
D: Clang OpenMP implementation
N: Guansong Zhang
E: guansong.zhang@amd.com
D: Clang OpenMP implementation
N: Sunita Chandrasekaran
E: sunisg123@gmail.com
D: Clang OpenMP implementation
N: Michael Wong
E: fraggamuffin@gmail.com
D: Clang OpenMP implementation

View File

@ -58,7 +58,7 @@ LLVM_OBJ_ROOT := $(call realpath, @abs_top_builddir@)
PROJ_SRC_ROOT := $(LLVM_SRC_ROOT)
PROJ_SRC_DIR := $(LLVM_SRC_ROOT)$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR))
# See: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20150323/268067.html
# See: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150323/268067.html
ifeq ($(LLVM_SRC_ROOT), $(LLVM_OBJ_ROOT))
$(error In-source builds are not allowed. Please configure from a separate build directory!)
endif

View File

@ -32,12 +32,12 @@ dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl address for reporting bugs.
AC_INIT([LLVM],[3.7.0svn],[http://llvm.org/bugs/])
AC_INIT([LLVM],[3.7.0],[http://llvm.org/bugs/])
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=7
LLVM_VERSION_PATCH=0
LLVM_VERSION_SUFFIX=svn
LLVM_VERSION_SUFFIX=
AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API])

View File

@ -131,7 +131,7 @@ endif()
# Pass -Wl,-z,defs. This makes sure all symbols are defined. Otherwise a DSO
# build might work on ELF but fail on MachO/COFF.
if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR
if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR CYGWIN OR
${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") AND
NOT LLVM_USE_SANITIZER)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,defs")

20
configure vendored
View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.60 for LLVM 3.7.0svn.
# Generated by GNU Autoconf 2.60 for LLVM 3.7.0.
#
# Report bugs to <http://llvm.org/bugs/>.
#
@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='LLVM'
PACKAGE_TARNAME='llvm'
PACKAGE_VERSION='3.7.0svn'
PACKAGE_STRING='LLVM 3.7.0svn'
PACKAGE_VERSION='3.7.0'
PACKAGE_STRING='LLVM 3.7.0'
PACKAGE_BUGREPORT='http://llvm.org/bugs/'
ac_unique_file="lib/IR/Module.cpp"
@ -1333,7 +1333,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures LLVM 3.7.0svn to adapt to many kinds of systems.
\`configure' configures LLVM 3.7.0 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@ -1399,7 +1399,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of LLVM 3.7.0svn:";;
short | recursive ) echo "Configuration of LLVM 3.7.0:";;
esac
cat <<\_ACEOF
@ -1583,7 +1583,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
LLVM configure 3.7.0svn
LLVM configure 3.7.0
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@ -1599,7 +1599,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by LLVM $as_me 3.7.0svn, which was
It was created by LLVM $as_me 3.7.0, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@ -1956,7 +1956,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LLVM_VERSION_MAJOR=3
LLVM_VERSION_MINOR=7
LLVM_VERSION_PATCH=0
LLVM_VERSION_SUFFIX=svn
LLVM_VERSION_SUFFIX=
cat >>confdefs.h <<_ACEOF
@ -18610,7 +18610,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by LLVM $as_me 3.7.0svn, which was
This file was extended by LLVM $as_me 3.7.0, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@ -18663,7 +18663,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
LLVM config.status 3.7.0svn
LLVM config.status 3.7.0
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"

View File

@ -173,7 +173,7 @@ Notes for code generation
also expected to generate an i8 store as an i8 store, and not an instruction
which writes to surrounding bytes. (If you are writing a backend for an
architecture which cannot satisfy these restrictions and cares about
concurrency, please send an email to llvmdev.)
concurrency, please send an email to llvm-dev.)
Unordered
---------

View File

@ -387,6 +387,10 @@ LLVM-specific variables
``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise this has no
effect.
**LLVM_DOXYGEN_SVG**:BOOL
Uses .svg files instead of .png files for graphs in the Doxygen output.
Defaults to OFF.
**LLVM_ENABLE_SPHINX**:BOOL
If enabled CMake will search for the ``sphinx-build`` executable and will make
the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available.

View File

@ -56,6 +56,14 @@ if (LLVM_ENABLE_DOXYGEN)
set(llvm_doxygen_qhp_cust_filter_attrs "")
endif()
option(LLVM_DOXYGEN_SVG
"Use svg instead of png files for doxygen graphs." OFF)
if (LLVM_DOXYGEN_SVG)
set(DOT_IMAGE_FORMAT "svg")
else()
set(DOT_IMAGE_FORMAT "png")
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/doxygen.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg @ONLY)
@ -73,6 +81,7 @@ if (LLVM_ENABLE_DOXYGEN)
set(llvm_doxygen_qhelpgenerator_path)
set(llvm_doxygen_qhp_cust_filter_name)
set(llvm_doxygen_qhp_cust_filter_attrs)
set(DOT_IMAGE_FORMAT)
add_custom_target(doxygen-llvm
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg

View File

@ -1814,6 +1814,7 @@ Here is the table:
:raw-html:`<th>SystemZ</th>`
:raw-html:`<th>X86</th>`
:raw-html:`<th>XCore</th>`
:raw-html:`<th>eBPF</th>`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1828,6 +1829,7 @@ Here is the table:
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="yes"></td> <!-- XCore -->`
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1842,6 +1844,7 @@ Here is the table:
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="no"></td> <!-- XCore -->`
:raw-html:`<td class="no"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1856,6 +1859,7 @@ Here is the table:
:raw-html:`<td class="no"></td> <!-- Sparc -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="yes"></td> <!-- XCore -->`
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1870,6 +1874,7 @@ Here is the table:
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="yes"></td> <!-- XCore -->`
:raw-html:`<td class="no"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1884,6 +1889,7 @@ Here is the table:
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="no"></td> <!-- XCore -->`
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1898,6 +1904,7 @@ Here is the table:
:raw-html:`<td class="yes"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="no"></td> <!-- XCore -->`
:raw-html:`<td class="yes"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1912,6 +1919,7 @@ Here is the table:
:raw-html:`<td class="no"></td> <!-- SystemZ -->`
:raw-html:`<td class="yes"></td> <!-- X86 -->`
:raw-html:`<td class="no"></td> <!-- XCore -->`
:raw-html:`<td class="no"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`<tr>`
@ -1926,6 +1934,7 @@ Here is the table:
:raw-html:`<td class="no"></td> <!-- SystemZ -->`
:raw-html:`<td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->`
:raw-html:`<td class="no"></td> <!-- XCore -->`
:raw-html:`<td class="no"></td> <!-- eBPF -->`
:raw-html:`</tr>`
:raw-html:`</table>`
@ -2448,3 +2457,191 @@ Code Generator Options:
:raw-html:`</tr>`
:raw-html:`</table>`
The extended Berkeley Packet Filter (eBPF) backend
--------------------------------------------------
Extended BPF (or eBPF) is similar to the original ("classic") BPF (cBPF) used
to filter network packets. The
`bpf() system call <http://man7.org/linux/man-pages/man2/bpf.2.html>`_
performs a range of operations related to eBPF. For both cBPF and eBPF
programs, the Linux kernel statically analyzes the programs before loading
them, in order to ensure that they cannot harm the running system. eBPF is
a 64-bit RISC instruction set designed for one to one mapping to 64-bit CPUs.
Opcodes are 8-bit encoded, and 87 instructions are defined. There are 10
registers, grouped by function as outlined below.
::
R0 return value from in-kernel functions; exit value for eBPF program
R1 - R5 function call arguments to in-kernel functions
R6 - R9 callee-saved registers preserved by in-kernel functions
R10 stack frame pointer (read only)
Instruction encoding (arithmetic and jump)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
eBPF is reusing most of the opcode encoding from classic to simplify conversion
of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
field is divided into three parts:
::
+----------------+--------+--------------------+
| 4 bits | 1 bit | 3 bits |
| operation code | source | instruction class |
+----------------+--------+--------------------+
(MSB) (LSB)
Three LSB bits store instruction class which is one of:
::
BPF_LD 0x0
BPF_LDX 0x1
BPF_ST 0x2
BPF_STX 0x3
BPF_ALU 0x4
BPF_JMP 0x5
(unused) 0x6
BPF_ALU64 0x7
When BPF_CLASS(code) == BPF_ALU or BPF_ALU64 or BPF_JMP,
4th bit encodes source operand
::
BPF_X 0x0 use src_reg register as source operand
BPF_K 0x1 use 32 bit immediate as source operand
and four MSB bits store operation code
::
BPF_ADD 0x0 add
BPF_SUB 0x1 subtract
BPF_MUL 0x2 multiply
BPF_DIV 0x3 divide
BPF_OR 0x4 bitwise logical OR
BPF_AND 0x5 bitwise logical AND
BPF_LSH 0x6 left shift
BPF_RSH 0x7 right shift (zero extended)
BPF_NEG 0x8 arithmetic negation
BPF_MOD 0x9 modulo
BPF_XOR 0xa bitwise logical XOR
BPF_MOV 0xb move register to register
BPF_ARSH 0xc right shift (sign extended)
BPF_END 0xd endianness conversion
If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of
::
BPF_JA 0x0 unconditional jump
BPF_JEQ 0x1 jump ==
BPF_JGT 0x2 jump >
BPF_JGE 0x3 jump >=
BPF_JSET 0x4 jump if (DST & SRC)
BPF_JNE 0x5 jump !=
BPF_JSGT 0x6 jump signed >
BPF_JSGE 0x7 jump signed >=
BPF_CALL 0x8 function call
BPF_EXIT 0x9 function return
Instruction encoding (load, store)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
For load and store instructions the 8-bit 'code' field is divided as:
::
+--------+--------+-------------------+
| 3 bits | 2 bits | 3 bits |
| mode | size | instruction class |
+--------+--------+-------------------+
(MSB) (LSB)
Size modifier is one of
::
BPF_W 0x0 word
BPF_H 0x1 half word
BPF_B 0x2 byte
BPF_DW 0x3 double word
Mode modifier is one of
::
BPF_IMM 0x0 immediate
BPF_ABS 0x1 used to access packet data
BPF_IND 0x2 used to access packet data
BPF_MEM 0x3 memory
(reserved) 0x4
(reserved) 0x5
BPF_XADD 0x6 exclusive add
Packet data access (BPF_ABS, BPF_IND)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
Register R6 is an implicit input that must contain pointer to sk_buff.
Register R0 is an implicit output which contains the data fetched
from the packet. Registers R1-R5 are scratch registers and must not
be used to store the data across BPF_ABS | BPF_LD or BPF_IND | BPF_LD
instructions. These instructions have implicit program exit condition
as well. When eBPF program is trying to access the data beyond
the packet boundary, the interpreter will abort the execution of the program.
BPF_IND | BPF_W | BPF_LD is equivalent to:
R0 = ntohl(\*(u32 \*) (((struct sk_buff \*) R6)->data + src_reg + imm32))
eBPF maps
^^^^^^^^^
eBPF maps are provided for sharing data between kernel and user-space.
Currently implemented types are hash and array, with potential extension to
support bloom filters, radix trees, etc. A map is defined by its type,
maximum number of elements, key size and value size in bytes. eBPF syscall
supports create, update, find and delete functions on maps.
Function calls
^^^^^^^^^^^^^^
Function call arguments are passed using up to five registers (R1 - R5).
The return value is passed in a dedicated register (R0). Four additional
registers (R6 - R9) are callee-saved, and the values in these registers
are preserved within kernel functions. R0 - R5 are scratch registers within
kernel functions, and eBPF programs must therefor store/restore values in
these registers if needed across function calls. The stack can be accessed
using the read-only frame pointer R10. eBPF registers map 1:1 to hardware
registers on x86_64 and other 64-bit architectures. For example, x86_64
in-kernel JIT maps them as
::
R0 - rax
R1 - rdi
R2 - rsi
R3 - rdx
R4 - rcx
R5 - r8
R6 - rbx
R7 - r13
R8 - r14
R9 - r15
R10 - rbp
since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
and rbx, r12 - r15 are callee saved.
Program start
^^^^^^^^^^^^^
An eBPF program receives a single argument and contains
a single eBPF main routine; the program does not contain eBPF functions.
Function calls are limited to a predefined set of kernel functions. The size
of a program is limited to 4K instructions: this ensures fast termination and
a limited number of kernel function calls. Prior to running an eBPF program,
a verifier performs static analysis to prevent loops in the code and
to ensure valid register usage and operand types.

View File

@ -28,7 +28,7 @@ Note that some code bases (e.g. ``libc++``) have really good reasons to deviate
from the coding standards. In the case of ``libc++``, this is because the
naming and other conventions are dictated by the C++ standard. If you think
there is a specific good reason to deviate from the standards here, please bring
it up on the LLVMdev mailing list.
it up on the LLVM-dev mailing list.
There are some conventions that are not uniformly followed in the code base
(e.g. the naming convention). This is because they are relatively new, and a

View File

@ -30,7 +30,7 @@ This policy is also designed to accomplish the following objectives:
This policy is aimed at frequent contributors to LLVM. People interested in
contributing one-off patches can do so in an informal way by sending them to the
`llvm-commits mailing list
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ and engaging another
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ and engaging another
developer to see it through the process.
Developer Policies
@ -47,23 +47,23 @@ Stay Informed
-------------
Developers should stay informed by reading at least the "dev" mailing list for
the projects you are interested in, such as `llvmdev
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ for LLVM, `cfe-dev
<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev>`_ for Clang, or `lldb-dev
<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev>`_ for LLDB. If you are
the projects you are interested in, such as `llvm-dev
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ for LLVM, `cfe-dev
<http://lists.llvm.org/mailman/listinfo/cfe-dev>`_ for Clang, or `lldb-dev
<http://lists.llvm.org/mailman/listinfo/lldb-dev>`_ for LLDB. If you are
doing anything more than just casual work on LLVM, it is suggested that you also
subscribe to the "commits" mailing list for the subproject you're interested in,
such as `llvm-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_, `cfe-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits>`_, or `lldb-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits>`_. Reading the
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_, `cfe-commits
<http://lists.llvm.org/mailman/listinfo/cfe-commits>`_, or `lldb-commits
<http://lists.llvm.org/mailman/listinfo/lldb-commits>`_. Reading the
"commits" list and paying attention to changes being made by others is a good
way to see what other people are interested in and watching the flow of the
project as a whole.
We recommend that active developers register an email account with `LLVM
Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs>`_ email list to keep track
<http://lists.llvm.org/mailman/listinfo/llvm-bugs>`_ email list to keep track
of bugs and enhancements occurring in LLVM. We really appreciate people who are
proactive at catching incoming bugs in their components and dealing with them
promptly.
@ -365,7 +365,7 @@ If you have recently been granted commit access, these policies apply:
#. You are granted *commit-after-approval* to all parts of LLVM. To get
approval, submit a `patch`_ to `llvm-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_. When approved,
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_. When approved,
you may commit it yourself.
#. You are allowed to commit patches without approval which you think are
@ -394,8 +394,8 @@ Making a Major Change
---------------------
When a developer begins a major new project with the aim of contributing it back
to LLVM, they should inform the community with an email to the `llvmdev
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ email list, to the extent
to LLVM, they should inform the community with an email to the `llvm-dev
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ email list, to the extent
possible. The reason for this is to:
#. keep the community informed about future changes to LLVM,
@ -608,7 +608,7 @@ LICENSE.txt files specifically indicate that they contain GPL code.
We have no plans to change the license of LLVM. If you have questions or
comments about the license, please contact the `LLVM Developer's Mailing
List <mailto:llvmdev@cs.uiuc.edu>`_.
List <mailto:llvm-dev@lists.llvm.org>`_.
Patents
-------

View File

@ -15,7 +15,7 @@ When you come to this realization, stop and think. Do you really need to extend
LLVM? Is it a new fundamental capability that LLVM does not support at its
current incarnation or can it be synthesized from already pre-existing LLVM
elements? If you are not sure, ask on the `LLVM-dev
<http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ list. The reason is that
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ list. The reason is that
extending LLVM will get involved as you need to update all the different passes
that you intend to use with your extension, and there are ``many`` LLVM analyses
and transformations, so it may be quite a bit of work.

View File

@ -174,10 +174,10 @@ Adding to this document
If you run across a case that you feel deserves to be covered here, please send
a patch to `llvm-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ for review.
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ for review.
If you have questions on these items, please direct them to `llvmdev
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_. The more relevant
If you have questions on these items, please direct them to `llvm-dev
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_. The more relevant
context you are able to give to your question, the more likely it is to be
answered.

View File

@ -714,9 +714,9 @@ used by people developing LLVM.
| | the configure script. The default list is defined |
| | as ``LLVM_ALL_TARGETS``, and can be set to include |
| | out-of-tree targets. The default value includes: |
| | ``AArch64, ARM, CppBackend, Hexagon, |
| | Mips, MSP430, NVPTX, PowerPC, AMDGPU, Sparc, |
| | SystemZ, X86, XCore``. |
| | ``AArch64, AMDGPU, ARM, BPF, CppBackend, Hexagon, |
| | Mips, MSP430, NVPTX, PowerPC, Sparc, SystemZ |
| | X86, XCore``. |
+-------------------------+----------------------------------------------------+
| LLVM_ENABLE_DOXYGEN | Build doxygen-based documentation from the source |
| | code This is disabled by default because it is |

View File

@ -6493,7 +6493,7 @@ Example:
%ptr = alloca i32 ; yields i32*:ptr
store i32 3, i32* %ptr ; yields void
%val = load i32* %ptr ; yields i32:val = i32 3
%val = load i32, i32* %ptr ; yields i32:val = i32 3
.. _i_fence:

View File

@ -31,6 +31,7 @@ $(PROJ_OBJ_DIR)/doxygen.cfg: doxygen.cfg.in
-e 's/@llvm_doxygen_qhp_cust_filter_name@//g' \
-e 's/@llvm_doxygen_qhp_namespace@//g' \
-e 's/@searchengine_url@//g' \
-e 's/@DOT_IMAGE_FORMAT@/png/g' \
> $@
endif

View File

@ -150,7 +150,7 @@ Status
Please let us know whether you like it and what could be improved! We're still
working on setting up a bug tracker, but you can email klimek-at-google-dot-com
and chandlerc-at-gmail-dot-com and CC the llvmdev mailing list with questions
and chandlerc-at-gmail-dot-com and CC the llvm-dev mailing list with questions
until then. We also could use help implementing improvements. This sadly is
really painful and hard because the Phabricator codebase is in PHP and not as
testable as you might like. However, we've put exactly what we're deploying up

View File

@ -254,4 +254,4 @@ Further Help
If you have any questions or need any help creating an LLVM project, the LLVM
team would be more than happy to help. You can always post your questions to
the `LLVM Developers Mailing List
<http://lists.cs.uiuc.edu/pipermail/llvmdev/>`_.
<http://lists.llvm.org/pipermail/llvm-dev/>`_.

View File

@ -5,12 +5,6 @@ LLVM 3.7 Release Notes
.. contents::
:local:
.. warning::
These are in-progress notes for the upcoming LLVM 3.7 release. You may
prefer the `LLVM 3.6 Release Notes <http://llvm.org/releases/3.6.0/docs
/ReleaseNotes.html>`_.
Introduction
============
@ -23,7 +17,7 @@ from the `LLVM releases web site <http://llvm.org/releases/>`_.
For more information about LLVM, including information about the latest
release, please check out the `main LLVM web site <http://llvm.org/>`_. If you
have questions or comments, the `LLVM Developer's Mailing List
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
them.
Note that if you are reading this file from a Subversion checkout or the main
@ -48,46 +42,346 @@ Non-comprehensive list of changes in this release
collection of tips for frontend authors on how to generate IR which LLVM is
able to effectively optimize.
* The DataLayout is no longer optional. All the IR level optimizations expects
* The ``DataLayout`` is no longer optional. All the IR level optimizations expects
it to be present and the API has been changed to use a reference instead of
a pointer to make it explicit. The Module owns the datalayout and it has to
match the one attached to the TargetMachine for generating code.
* ... next change ...
In 3.6, a pass was inserted in the pipeline to make the ``DataLayout`` accessible:
``MyPassManager->add(new DataLayoutPass(MyTargetMachine->getDataLayout()));``
In 3.7, you don't need a pass, you set the ``DataLayout`` on the ``Module``:
``MyModule->setDataLayout(MyTargetMachine->createDataLayout());``
.. NOTE
If you would like to document a larger change, then you can add a
subsection about it right here. You can copy the following boilerplate
and un-indent it (the indentation causes it to be inside this comment).
The LLVM C API ``LLVMGetTargetMachineData`` is deprecated to reflect the fact
that it won't be available anymore from ``TargetMachine`` in 3.8.
Special New Feature
-------------------
* Comdats are now orthogonal to the linkage. LLVM will not create
comdats for weak linkage globals and the frontends are responsible
for explicitly adding them.
Makes programs 10x faster by doing Special New Thing.
* On ELF we now support multiple sections with the same name and
comdat. This allows for smaller object files since multiple
sections can have a simple name (`.text`, `.rodata`, etc).
Changes to the ARM Backend
--------------------------
* LLVM now lazily loads metadata in some cases. Creating archives
with IR files with debug info is now 25X faster.
During this release ...
* llvm-ar can create archives in the BSD format used by OS X.
* LLVM received a backend for the extended Berkely Packet Filter
instruction set that can be dynamically loaded into the Linux kernel via the
`bpf(2) <http://man7.org/linux/man-pages/man2/bpf.2.html>`_ syscall.
Support for BPF has been present in the kernel for some time, but starting
from 3.18 has been extended with such features as: 64-bit registers, 8
additional registers registers, conditional backwards jumps, call
instruction, shift instructions, map (hash table, array, etc.), 1-8 byte
load/store from stack, and more.
Up until now, users of BPF had to write bytecode by hand, or use
custom generators. This release adds a proper LLVM backend target for the BPF
bytecode architecture.
The BPF target is now available by default, and options exist in both Clang
(-target bpf) or llc (-march=bpf) to pick eBPF as a backend.
* Switch-case lowering was rewritten to avoid generating unbalanced search trees
(`PR22262 <http://llvm.org/pr22262>`_) and to exploit profile information
when available. Some lowering strategies are now disabled when optimizations
are turned off, to save compile time.
* The debug info IR class hierarchy now inherits from ``Metadata`` and has its
own bitcode records and assembly syntax
(`documented in LangRef <LangRef.html#specialized-metadata-nodes>`_). The debug
info verifier has been merged with the main verifier.
* LLVM IR and APIs are in a period of transition to aid in the removal of
pointer types (the end goal being that pointers are typeless/opaque - void*,
if you will). Some APIs and IR constructs have been modified to take
explicit types that are currently checked to match the target type of their
pre-existing pointer type operands. Further changes are still needed, but the
more you can avoid using ``PointerType::getPointeeType``, the easier the
migration will be.
* Argument-less ``TargetMachine::getSubtarget`` and
``TargetMachine::getSubtargetImpl`` have been removed from the tree. Updating
out of tree ports is as simple as implementing a non-virtual version in the
target, but implementing full ``Function`` based ``TargetSubtargetInfo``
support is recommended.
* This is expected to be the last major release of LLVM that supports being
run on Windows XP and Windows Vista. For the next major release the minimum
Windows version requirement will be Windows 7.
Changes to the MIPS Target
--------------------------
During this release ...
During this release the MIPS target has:
* Added support for MIPS32R3, MIPS32R5, MIPS32R3, MIPS32R5, and microMIPS32.
* Added support for dynamic stack realignment. This is of particular importance
to MSA on 32-bit subtargets since vectors always exceed the stack alignment on
the O32 ABI.
* Added support for compiler-rt including:
* Support for the Address, and Undefined Behaviour Sanitizers for all MIPS
subtargets.
* Support for the Data Flow, and Memory Sanitizer for 64-bit subtargets.
* Support for the Profiler for all MIPS subtargets.
* Added support for libcxx, and libcxxabi.
* Improved inline assembly support such that memory constraints may now make use
of the appropriate address offsets available to the instructions. Also, added
support for the ``ZC`` constraint.
* Added support for 128-bit integers on 64-bit subtargets and 16-bit floating
point conversions on all subtargets.
* Added support for read-only ``.eh_frame`` sections by storing type information
indirectly.
* Added support for MCJIT on all 64-bit subtargets as well as MIPS32R6.
* Added support for fast instruction selection on MIPS32 and MIPS32R2 with PIC.
* Various bug fixes. Including the following notable fixes:
* Fixed 'jumpy' debug line info around calls where calculation of the address
of the function would inappropriately change the line number.
* Fixed missing ``__mips_isa_rev`` macro on the MIPS32R6 and MIPS32R6
subtargets.
* Fixed representation of NaN when targeting systems using traditional
encodings. Traditionally, MIPS has used NaN encodings that were compatible
with IEEE754-1985 but would later be found incompatible with IEEE754-2008.
* Fixed multiple segfaults and assertions in the disassembler when
disassembling instructions that have memory operands.
* Fixed multiple cases of suboptimal code generation involving $zero.
* Fixed code generation of 128-bit shifts on 64-bit subtargets.
* Prevented the delay slot filler from filling call delay slots with
instructions that modify or use $ra.
* Fixed some remaining N32/N64 calling convention bugs when using small
structures on big-endian subtargets.
* Fixed missing sign-extensions that are required by the N32/N64 calling
convention when generating calls to library functions with 32-bit
parameters.
* Corrected the ``int64_t`` typedef to be ``long`` for N64.
* ``-mno-odd-spreg`` is now honoured for vector insertion/extraction
operations when using -mmsa.
* Fixed vector insertion and extraction for MSA on 64-bit subtargets.
* Corrected the representation of member function pointers. This makes them
usable on microMIPS subtargets.
Changes to the PowerPC Target
-----------------------------
During this release ...
There are numerous improvements to the PowerPC target in this release:
* LLVM now supports the ISA 2.07B (POWER8) instruction set, including
direct moves between general registers and vector registers, and
built-in support for hardware transactional memory (HTM). Some missing
instructions from ISA 2.06 (POWER7) were also added.
Changes to the OCaml bindings
* Code generation for the local-dynamic and global-dynamic thread-local
storage models has been improved.
* Loops may be restructured to leverage pre-increment loads and stores.
* QPX - The vector instruction set used by the IBM Blue Gene/Q supercomputers
is now supported.
* Loads from the TOC area are now correctly treated as invariant.
* PowerPC now has support for i128 and v1i128 types. The types differ
in how they are passed in registers for the ELFv2 ABI.
* Disassembly will now print shorter mnemonic aliases when available.
* Optional register name prefixes for VSX and QPX registers are now
supported in the assembly parser.
* The back end now contains a pass to remove unnecessary vector swaps
from POWER8 little-endian code generation. Additional improvements
are planned for release 3.8.
* The undefined-behavior sanitizer (UBSan) is now supported for PowerPC.
* Many new vector programming APIs have been added to altivec.h.
Additional ones are planned for release 3.8.
* PowerPC now supports __builtin_call_with_static_chain.
* PowerPC now supports the revised -mrecip option that permits finer
control over reciprocal estimates.
* Many bugs have been identified and fixed.
Changes to the SystemZ Target
-----------------------------
During this release ...
* LLVM no longer attempts to automatically detect the current host CPU when
invoked natively.
* Support for all thread-local storage models. (Previous releases would support
only the local-exec TLS model.)
* The POPCNT instruction is now used on z196 and above.
* The RISBGN instruction is now used on zEC12 and above.
* Support for the transactional-execution facility on zEC12 and above.
* Support for the z13 processor and its vector facility.
Changes to the JIT APIs
-----------------------
* Added a new C++ JIT API called On Request Compilation, or ORC.
ORC is a new JIT API inspired by MCJIT but designed to be more testable, and
easier to extend with new features. A key new feature already in tree is lazy,
function-at-a-time compilation for X86. Also included is a reimplementation of
MCJIT's API and behavior (OrcMCJITReplacement). MCJIT itself remains in tree,
and continues to be the default JIT ExecutionEngine, though new users are
encouraged to try ORC out for their projects. (A good place to start is the
new ORC tutorials under llvm/examples/kaleidoscope/orc).
Sub-project Status Update
=========================
In addition to the core LLVM 3.7 distribution of production-quality compiler
infrastructure, the LLVM project includes sub-projects that use the LLVM core
and share the same distribution license. This section provides updates on these
sub-projects.
Polly - The Polyhedral Loop Optimizer in LLVM
---------------------------------------------
`Polly <http://polly.llvm.org>`_ is a polyhedral loop optimization
infrastructure that provides data-locality optimizations to LLVM-based
compilers. When compiled as part of clang or loaded as a module into clang,
it can perform loop optimizations such as tiling, loop fusion or outer-loop
vectorization. As a generic loop optimization infrastructure it allows
developers to get a per-loop-iteration model of a loop nest on which detailed
analysis and transformations can be performed.
Changes since the last release:
* isl imported into Polly distribution
`isl <http://repo.or.cz/w/isl.git>`_, the math library Polly uses, has been
imported into the source code repository of Polly and is now distributed as part
of Polly. As this was the last external library dependency of Polly, Polly can
now be compiled right after checking out the Polly source code without the need
for any additional libraries to be pre-installed.
* Small integer optimization of isl
The MIT licensed imath backend using in `isl <http://repo.or.cz/w/isl.git>`_ for
arbitrary width integer computations has been optimized to use native integer
operations for the common case where the operands of a computation fit into 32
bit and to only fall back to large arbitrary precision integers for the
remaining cases. This optimization has greatly improved the compile-time
performance of Polly, both due to faster native operations also due to a
reduction in malloc traffic and pointer indirections. As a result, computations
that use arbitrary precision integers heavily have been speed up by almost 6x.
As a result, the compile-time of Polly on the Polybench test kernels in the LNT
suite has been reduced by 20% on average with compile time reductions between
9-43%.
* Schedule Trees
Polly now uses internally so-called > Schedule Trees < to model the loop
structure it optimizes. Schedule trees are an easy to understand tree structure
that describes a loop nest using integer constraint sets to keep track of
execution constraints. It allows the developer to use per-tree-node operations
to modify the loop tree. Programatic analysis that work on the schedule tree
(e.g., as dependence analysis) also show a visible speedup as they can exploit
the tree structure of the schedule and need to fall back to ILP based
optimization problems less often. Section 6 of `Polyhedral AST generation is
more than scanning polyhedra
<http://www.grosser.es/#pub-polyhedral-AST-generation>`_ gives a detailed
explanation of this schedule trees.
* Scalar and PHI node modeling - Polly as an analysis
Polly now requires almost no preprocessing to analyse LLVM-IR, which makes it
easier to use Polly as a pure analysis pass e.g. to provide more precise
dependence information to non-polyhedral transformation passes. Originally,
Polly required the input LLVM-IR to be preprocessed such that all scalar and
PHI-node dependences are translated to in-memory operations. Since this release,
Polly has full support for scalar and PHI node dependences and requires no
scalar-to-memory translation for such kind of dependences.
* Modeling of modulo and non-affine conditions
Polly can now supports modulo operations such as A[t%2][i][j] as they appear
often in stencil computations and also allows data-dependent conditional
branches as they result e.g. from ternary conditions ala A[i] > 255 ? 255 :
A[i].
* Delinearization
Polly now support the analysis of manually linearized multi-dimensional arrays
as they result form macros such as
"#define 2DARRAY(A,i,j) (A.data[(i) * A.size + (j)]". Similar constructs appear
in old C code written before C99, C++ code such as boost::ublas, LLVM exported
from Julia, Matlab generated code and many others. Our work titled
`Optimistic Delinearization of Parametrically Sized Arrays
<http://www.grosser.es/#pub-optimistic-delinerization>`_ gives details.
* Compile time improvements
Pratik Bahtu worked on compile-time performance tuning of Polly. His work
together with the support for schedule trees and the small integer optimization
in isl notably reduced the compile time.
* Increased compute timeouts
As Polly's compile time has been notabily improved, we were able to increase
the compile time saveguards in Polly. As a result, the default configuration
of Polly can now analyze larger loop nests without running into compile time
restrictions.
* Export Debug Locations via JSCoP file
Polly's JSCoP import/export format gained support for debug locations that show
to the user the source code location of detected scops.
* Improved windows support
The compilation of Polly on windows using cmake has been improved and several
visual studio build issues have been addressed.
* Many bug fixes
libunwind
---------
The unwind implementation which use to reside in `libc++abi` has been moved into
a separate repository. This implementation can still be used for `libc++abi` by
specifying `-DLIBCXXABI_USE_LLVM_UNWINDER=YES` and
`-DLIBCXXABI_LIBUNWIND_PATH=<path to libunwind source>` when configuring
`libc++abi`, which defaults to `true` when building on ARM.
The new repository can also be built standalone if just `libunwind` is desired.
External Open Source Projects Using LLVM 3.7
============================================
@ -96,7 +390,74 @@ An exciting aspect of LLVM is that it is used as an enabling technology for
a lot of other language and tools projects. This section lists some of the
projects that have already been updated to work with LLVM 3.7.
* A project
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
are underway.
Portable Computing Language (pocl)
----------------------------------
In addition to producing an easily portable open source OpenCL
implementation, another major goal of `pocl <http://portablecl.org/>`_
is improving performance portability of OpenCL programs with
compiler optimizations, reducing the need for target-dependent manual
optimizations. An important part of pocl is a set of LLVM passes used to
statically parallelize multiple work-items with the kernel compiler, even in
the presence of work-group barriers.
TTA-based Co-design Environment (TCE)
-------------------------------------
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
exposed datapath processors based on the Transport triggered
architecture (TTA).
The toolset provides a complete co-design flow from C/C++
programs down to synthesizable VHDL/Verilog and parallel program binaries.
Processor customization points include the register files, function units,
supported operations, and the interconnection network.
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
optimizations and also for parts of code generation. It generates
new LLVM-based code generators "on the fly" for the designed processors and
loads them in to the compiler backend as runtime libraries to avoid
per-target recompilation of larger parts of the compiler chain.
BPF Compiler Collection (BCC)
-----------------------------
`BCC <https://github.com/iovisor/bcc>`_ is a Python + C framework for tracing and
networking that is using Clang rewriter + 2nd pass of Clang + BPF backend to
generate eBPF and push it into the kernel.
LLVMSharp & ClangSharp
----------------------
`LLVMSharp <http://www.llvmsharp.org>`_ and
`ClangSharp <http://www.clangsharp.org>`_ are type-safe C# bindings for
Microsoft.NET and Mono that Platform Invoke into the native libraries.
ClangSharp is self-hosted and is used to generated LLVMSharp using the
LLVM-C API.
`LLVMSharp Kaleidoscope Tutorials <http://www.llvmsharp.org/Kaleidoscope/>`_
are instructive examples of writing a compiler in C#, with certain improvements
like using the visitor pattern to generate LLVM IR.
`ClangSharp PInvoke Generator <http://www.clangsharp.org/PInvoke/>`_ is the
self-hosting mechanism for LLVM/ClangSharp and is demonstrative of using
LibClang to generate Platform Invoke (PInvoke) signatures for C APIs.
Additional Information
@ -111,4 +472,3 @@ going into the ``llvm/docs/`` directory in the LLVM tree.
If you have any questions or comments about LLVM, please feel free to contact
us via the `mailing lists <http://llvm.org/docs/#maillist>`_.

View File

@ -565,7 +565,7 @@ The existing IR Verifier pass has been extended to check most of the
local restrictions on the intrinsics mentioned in their respective
documentation. The current implementation in LLVM does not check the
key relocation invariant, but this is ongoing work on developing such
a verifier. Please ask on llvmdev if you're interested in
a verifier. Please ask on llvm-dev if you're interested in
experimenting with the current version.
.. _statepoint-utilities:
@ -696,7 +696,7 @@ If you are scheduling the RewriteStatepointsForGC pass late in the pass order,
you should probably schedule this pass immediately before it. The exception
would be if you need to preserve abstract frame information (e.g. for
deoptimization or introspection) at safepoints. In that case, ask on the
llvmdev mailing list for suggestions.
llvm-dev mailing list for suggestions.
Bugs and Enhancements
@ -707,8 +707,8 @@ tracked by performing a `bugzilla search
<http://llvm.org/bugs/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
for [Statepoint] in the summary field. When filing new bugs, please
use this tag so that interested parties see the newly filed bug. As
with most LLVM features, design discussions take place on `llvmdev
<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_, and patches
with most LLVM features, design discussions take place on `llvm-dev
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_, and patches
should be sent to `llvm-commits
<http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits>`_ for review.
<http://lists.llvm.org/mailman/listinfo/llvm-commits>`_ for review.

View File

@ -7,7 +7,7 @@ TableGen Language Introduction
.. warning::
This document is extremely rough. If you find something lacking, please
fix it, file a documentation bug, or ask about it on llvmdev.
fix it, file a documentation bug, or ask about it on llvm-dev.
Introduction
============

View File

@ -7,7 +7,7 @@ TableGen Language Reference
.. warning::
This document is extremely rough. If you find something lacking, please
fix it, file a documentation bug, or ask about it on llvmdev.
fix it, file a documentation bug, or ask about it on llvm-dev.
Introduction
============

View File

@ -11,6 +11,7 @@
# serve to show the default.
import sys, os
from datetime import date
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@ -40,7 +41,7 @@
# General information about the project.
project = u'LLVM'
copyright = u'2003-2014, LLVM Project'
copyright = u'2003-%d, LLVM Project' % date.today().year
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the

View File

@ -2205,7 +2205,7 @@ DIRECTORY_GRAPH = YES
# The default value is: png.
# This tag requires that the tag HAVE_DOT is set to YES.
DOT_IMAGE_FORMAT = png
DOT_IMAGE_FORMAT = @DOT_IMAGE_FORMAT@
# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
# enable generation of interactive SVG images that allow zooming and panning.

View File

@ -1,11 +1,6 @@
Overview
========
.. warning::
If you are using a released version of LLVM, see `the download page
<http://llvm.org/releases/>`_ to find your documentation.
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.
@ -425,12 +420,12 @@ Mailing Lists
If you can't find what you need in these docs, try consulting the mailing
lists.
`Developer's List (llvmdev)`__
`Developer's List (llvm-dev)`__
This list is for people who want to be included in technical discussions of
LLVM. People post to this list when they have questions about writing code
for or using the LLVM tools. It is relatively low volume.
.. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
.. __: http://lists.llvm.org/mailman/listinfo/llvm-dev
`Commits Archive (llvm-commits)`__
This list contains all commit messages that are made when LLVM developers
@ -439,26 +434,26 @@ lists.
stay on the bleeding edge of LLVM development. This list is very high
volume.
.. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/
.. __: http://lists.llvm.org/pipermail/llvm-commits/
`Bugs & Patches Archive (llvmbugs)`__
`Bugs & Patches Archive (llvm-bugs)`__
This list gets emailed every time a bug is opened and closed. It is
higher volume than the LLVMdev list.
higher volume than the LLVM-dev list.
.. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/
.. __: http://lists.llvm.org/pipermail/llvm-bugs/
`Test Results Archive (llvm-testresults)`__
A message is automatically sent to this list by every active nightly tester
when it completes. As such, this list gets email several times each day,
making it a high volume list.
.. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/
.. __: http://lists.llvm.org/pipermail/llvm-testresults/
`LLVM Announcements List (llvm-announce)`__
This is a low volume list that provides important announcements regarding
LLVM. It gets email about once a month.
.. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce
.. __: http://lists.llvm.org/mailman/listinfo/llvm-announce
IRC
---

View File

@ -90,8 +90,8 @@ For example, try adding:
Have fun - try doing something crazy and unusual. Building a language
like everyone else always has, is much less fun than trying something a
little crazy or off the wall and seeing how it turns out. If you get
stuck or want to talk about it, feel free to email the `llvmdev mailing
list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
stuck or want to talk about it, feel free to email the `llvm-dev mailing
list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_: it has lots
of people who are interested in languages and are often willing to help
out.
@ -169,8 +169,8 @@ It is certainly possible to implement a safe language in LLVM, but LLVM
IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
casts, use after free bugs, buffer over-runs, and a variety of other
problems. Safety needs to be implemented as a layer on top of LLVM and,
conveniently, several groups have investigated this. Ask on the `llvmdev
mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
conveniently, several groups have investigated this. Ask on the `llvm-dev
mailing list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ if
you are interested in more details.
Language-Specific Optimizations
@ -220,7 +220,7 @@ safe to optimize that into "return 0;" because C specifies what the
In addition to simple library knowledge, it is possible to embed a
variety of other language-specific information into the LLVM IR. If you
have a specific need and run into a wall, please bring the topic up on
the llvmdev list. At the very worst, you can always treat LLVM as if it
the llvm-dev list. At the very worst, you can always treat LLVM as if it
were a "dumb code generator" and implement the high-level optimizations
you desire in your front-end, on the language-specific AST.

View File

@ -95,8 +95,8 @@ For example, try adding:
Have fun - try doing something crazy and unusual. Building a language
like everyone else always has, is much less fun than trying something a
little crazy or off the wall and seeing how it turns out. If you get
stuck or want to talk about it, feel free to email the `llvmdev mailing
list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
stuck or want to talk about it, feel free to email the `llvm-dev mailing
list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_: it has lots
of people who are interested in languages and are often willing to help
out.
@ -174,8 +174,8 @@ It is certainly possible to implement a safe language in LLVM, but LLVM
IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
casts, use after free bugs, buffer over-runs, and a variety of other
problems. Safety needs to be implemented as a layer on top of LLVM and,
conveniently, several groups have investigated this. Ask on the `llvmdev
mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
conveniently, several groups have investigated this. Ask on the `llvm-dev
mailing list <http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ if
you are interested in more details.
Language-Specific Optimizations
@ -225,7 +225,7 @@ safe to optimize that into "return 0;" because C specifies what the
In addition to simple library knowledge, it is possible to embed a
variety of other language-specific information into the LLVM IR. If you
have a specific need and run into a wall, please bring the topic up on
the llvmdev list. At the very worst, you can always treat LLVM as if it
the llvm-dev list. At the very worst, you can always treat LLVM as if it
were a "dumb code generator" and implement the high-level optimizations
you desire in your front-end, on the language-specific AST.

View File

@ -115,7 +115,7 @@ char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T);
LLVMDisposeMessage. */
char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T);
/** Returns the llvm::DataLayout used for this llvm:TargetMachine. */
/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
/** Set the target machine's ASM verbosity. */

View File

@ -315,8 +315,10 @@ class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
T2>::value>::type * = nullptr) {
// Use memcpy for PODs iterated by pointers (which includes SmallVector
// iterators): std::uninitialized_copy optimizes to memmove, but we can
// use memcpy here.
memcpy(Dest, I, (E-I)*sizeof(T));
// use memcpy here. Note that I and E are iterators and thus might be
// invalid for memcpy if they are equal.
if (I != E)
memcpy(Dest, I, (E - I) * sizeof(T));
}
/// Double the size of the allocated memory, guaranteeing space for at

View File

@ -158,7 +158,8 @@ class StringMapEntry : public StringMapEntryBase {
// Copy the string information.
char *StrBuffer = const_cast<char*>(NewItem->getKeyData());
memcpy(StrBuffer, Key.data(), KeyLength);
if (KeyLength > 0)
memcpy(StrBuffer, Key.data(), KeyLength);
StrBuffer[KeyLength] = 0; // Null terminate for convenience of clients.
return NewItem;
}

View File

@ -32,11 +32,13 @@ namespace llvm {
class LiveInterval;
class LiveIntervalAnalysis;
class MachineRegisterInfo;
class TargetRegisterInfo;
class VirtRegMap;
class LiveRegMatrix : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
VirtRegMap *VRM;

View File

@ -95,8 +95,20 @@ class MachineRegisterInfo {
return MO->Contents.Reg.Next;
}
/// UsedRegUnits - This is a bit vector that is computed and set by the
/// register allocator, and must be kept up to date by passes that run after
/// register allocation (though most don't modify this). This is used
/// so that the code generator knows which callee save registers to save and
/// for other target specific uses.
/// This vector has bits set for register units that are modified in the
/// current function. It doesn't include registers clobbered by function
/// calls with register mask operands.
BitVector UsedRegUnits;
/// UsedPhysRegMask - Additional used physregs including aliases.
/// This bit vector represents all the registers clobbered by function calls.
/// It can model things that UsedRegUnits can't, such as function calls that
/// clobber ymm7 but preserve the low half in xmm7.
BitVector UsedPhysRegMask;
/// ReservedRegs - This is a bit vector of reserved registers. The target
@ -641,12 +653,55 @@ class MachineRegisterInfo {
/// ignored.
bool isPhysRegModified(unsigned PhysReg) const;
//===--------------------------------------------------------------------===//
// Physical Register Use Info
//===--------------------------------------------------------------------===//
/// isPhysRegUsed - Return true if the specified register is used in this
/// function. Also check for clobbered aliases and registers clobbered by
/// function calls with register mask operands.
///
/// This only works after register allocation.
bool isPhysRegUsed(unsigned Reg) const {
if (UsedPhysRegMask.test(Reg))
return true;
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
Units.isValid(); ++Units)
if (UsedRegUnits.test(*Units))
return true;
return false;
}
/// Mark the specified register unit as used in this function.
/// This should only be called during and after register allocation.
void setRegUnitUsed(unsigned RegUnit) {
UsedRegUnits.set(RegUnit);
}
/// setPhysRegUsed - Mark the specified register used in this function.
/// This should only be called during and after register allocation.
void setPhysRegUsed(unsigned Reg) {
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
Units.isValid(); ++Units)
UsedRegUnits.set(*Units);
}
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
/// This corresponds to the bit mask attached to register mask operands.
void addPhysRegsUsedFromRegMask(const uint32_t *RegMask) {
UsedPhysRegMask.setBitsNotInMask(RegMask);
}
/// setPhysRegUnused - Mark the specified register unused in this function.
/// This should only be called during and after register allocation.
void setPhysRegUnused(unsigned Reg) {
UsedPhysRegMask.reset(Reg);
for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
Units.isValid(); ++Units)
UsedRegUnits.reset(*Units);
}
//===--------------------------------------------------------------------===//
// Reserved Register Info
//===--------------------------------------------------------------------===//

View File

@ -125,10 +125,15 @@ class TargetMachine {
return *static_cast<const STC*>(getSubtargetImpl(F));
}
/// Deprecated in 3.7, will be removed in 3.8. Use createDataLayout() instead.
///
/// This method returns a pointer to the DataLayout for the target. It should
/// be unchanging for every subtarget.
const DataLayout *getDataLayout() const { return &DL; }
/// Create a DataLayout.
const DataLayout createDataLayout() const { return DL; }
/// \brief Reset the target options based on the function's attributes.
// FIXME: Remove TargetOptions that affect per-function code generation
// from TargetMachine.

View File

@ -206,14 +206,6 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
return V;
}
if (ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
// if it's a constant, just convert it to an offset
// and remove the variable.
Offset += Const->getValue();
assert(Scale == 0 && "Constant values don't have a scale");
return V;
}
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
switch (BOp->getOpcode()) {
@ -261,10 +253,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
Depth + 1, AC, DT);
Scale = Scale.zext(OldWidth);
// We have to sign-extend even if Extension == EK_ZeroExt as we can't
// decompose a sign extension (i.e. zext(x - 1) != zext(x) - zext(-1)).
Offset = Offset.sext(OldWidth);
Offset = Offset.zext(OldWidth);
return Result;
}
@ -1135,43 +1124,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(
}
}
// Try to distinguish something like &A[i][1] against &A[42][0].
// Grab the least significant bit set in any of the scales.
if (!GEP1VariableIndices.empty()) {
uint64_t Modulo = 0;
bool AllPositive = true;
for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
// Try to distinguish something like &A[i][1] against &A[42][0].
// Grab the least significant bit set in any of the scales. We
// don't need std::abs here (even if the scale's negative) as we'll
// be ^'ing Modulo with itself later.
for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
if (AllPositive) {
// If the Value could change between cycles, then any reasoning about
// the Value this cycle may not hold in the next cycle. We'll just
// give up if we can't determine conditions that hold for every cycle:
const Value *V = GEP1VariableIndices[i].V;
bool SignKnownZero, SignKnownOne;
ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, *DL,
0, AC1, nullptr, DT);
// Zero-extension widens the variable, and so forces the sign
// bit to zero.
bool IsZExt = GEP1VariableIndices[i].Extension == EK_ZeroExt;
SignKnownZero |= IsZExt;
SignKnownOne &= !IsZExt;
// If the variable begins with a zero then we know it's
// positive, regardless of whether the value is signed or
// unsigned.
int64_t Scale = GEP1VariableIndices[i].Scale;
AllPositive =
(SignKnownZero && Scale >= 0) ||
(SignKnownOne && Scale < 0);
}
}
Modulo = Modulo ^ (Modulo & (Modulo - 1));
// We can compute the difference between the two addresses
@ -1182,12 +1140,6 @@ AliasResult BasicAliasAnalysis::aliasGEP(
V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
V1Size <= Modulo - ModOffset)
return NoAlias;
// If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
// If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
// don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t) GEP1BaseOffset)
return NoAlias;
}
// Statically, we can see that the base objects are the same, but the

View File

@ -440,31 +440,40 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
}
// Scan the function bodies for explicit loads or stores.
for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;
++i)
for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
E = inst_end(SCC[i]->getFunction());
II != E && FunctionEffect != ModRef; ++II)
if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
FunctionEffect |= Ref;
if (LI->isVolatile())
// Volatile loads may have side-effects, so mark them as writing
// memory (for example, a flag inside the processor).
FunctionEffect |= Mod;
} else if (StoreInst *SI = dyn_cast<StoreInst>(&*II)) {
FunctionEffect |= Mod;
if (SI->isVolatile())
// Treat volatile stores as reading memory somewhere.
FunctionEffect |= Ref;
} else if (isAllocationFn(&*II, TLI) || isFreeCall(&*II, TLI)) {
FunctionEffect |= ModRef;
} else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) {
// The callgraph doesn't include intrinsic calls.
Function *Callee = Intrinsic->getCalledFunction();
ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee);
FunctionEffect |= (Behaviour & ModRef);
for (auto *Node : SCC) {
if (FunctionEffect == ModRef)
break; // The mod/ref lattice saturates here.
for (Instruction &I : inst_range(Node->getFunction())) {
if (FunctionEffect == ModRef)
break; // The mod/ref lattice saturates here.
// We handle calls specially because the graph-relevant aspects are
// handled above.
if (auto CS = CallSite(&I)) {
if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) {
// FIXME: It is completely unclear why this is necessary and not
// handled by the above graph code.
FunctionEffect |= ModRef;
} else if (Function *Callee = CS.getCalledFunction()) {
// The callgraph doesn't include intrinsic calls.
if (Callee->isIntrinsic()) {
ModRefBehavior Behaviour =
AliasAnalysis::getModRefBehavior(Callee);
FunctionEffect |= (Behaviour & ModRef);
}
}
continue;
}
// All non-call instructions we use the primary predicates for whether
// thay read or write memory.
if (I.mayReadFromMemory())
FunctionEffect |= Ref;
if (I.mayWriteToMemory())
FunctionEffect |= Mod;
}
}
if ((FunctionEffect & Mod) == 0)
++NumReadMemFunctions;
if (FunctionEffect == 0)

View File

@ -3574,18 +3574,9 @@ static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
unsigned IndexVal = IdxC->getZExtValue();
unsigned VectorWidth = Vec->getType()->getVectorNumElements();
// If this is extracting an invalid index, turn this into undef, to avoid
// crashing the code below.
if (IndexVal >= VectorWidth)
return UndefValue::get(Vec->getType()->getVectorElementType());
if (Value *Elt = findScalarElement(Vec, IndexVal))
if (auto *IdxC = dyn_cast<ConstantInt>(Idx))
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
return Elt;
}
return nullptr;
}

View File

@ -374,9 +374,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
return Tmp.getAddr();
// If we don't have an available version of this value, it must be an
// instruction.
Instruction *Inst = cast<Instruction>(InVal);
// We don't need to PHI translate values which aren't instructions.
auto *Inst = dyn_cast<Instruction>(InVal);
if (!Inst)
return nullptr;
// Handle cast of PHI translatable value.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {

View File

@ -402,8 +402,9 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
if (match(V,
llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
llvm::PatternMatch::m_Constant(Con)))) {
if (Con->getAggregateElement(EltNo)->isNullValue())
return findScalarElement(Val, EltNo);
if (Constant *Elt = Con->getAggregateElement(EltNo))
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
}
// Otherwise, we don't know.

View File

@ -733,14 +733,12 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
// If no relevant registers are used in the function, we can skip it
// completely.
bool anyregs = false;
const MachineRegisterInfo &MRI = mf.getRegInfo();
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E && !anyregs; ++I)
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
if (!MRI.reg_nodbg_empty(*AI)) {
anyregs = true;
break;
}
I != E; ++I)
if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true;
break;
}
if (!anyregs) return false;
// Initialize the AliasMap on the first use.

View File

@ -15,12 +15,12 @@
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@ -49,6 +49,7 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
VRM = &getAnalysis<VirtRegMap>();
@ -100,6 +101,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
<< " to " << PrintReg(PhysReg, TRI) << ':');
assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
MRI->setPhysRegUsed(PhysReg);
foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
const LiveRange &Range) {

View File

@ -29,6 +29,7 @@ MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
TracksSubRegLiveness(false) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
// Create the physreg use/def lists.

View File

@ -624,6 +624,10 @@ struct DataDep {
static bool getDataDeps(const MachineInstr *UseMI,
SmallVectorImpl<DataDep> &Deps,
const MachineRegisterInfo *MRI) {
// Debug values should not be included in any calculations.
if (UseMI->isDebugValue())
return false;
bool HasPhysRegs = false;
for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(),
E = UseMI->operands_end(); I != E; ++I) {

View File

@ -1026,8 +1026,12 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
MachineRegisterInfo &MRI = Fn.getRegInfo();
Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
// Make sure MRI now accounts this register as used.
MRI.setPhysRegUsed(ScratchReg);
// Because this instruction was processed by the RS before this
// register was allocated, make sure that the RS now records the
// register as being used.

View File

@ -986,6 +986,10 @@ void RAFast::AllocateBasicBlock() {
}
}
for (UsedInInstrSet::iterator
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
MRI->setRegUnitUsed(*I);
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@ -1046,6 +1050,10 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
for (UsedInInstrSet::iterator
I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
MRI->setRegUnitUsed(*I);
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
Coalesced.push_back(MI);
@ -1095,6 +1103,12 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
AllocateBasicBlock();
}
// Add the clobber lists for all the instructions we skipped earlier.
for (const MCInstrDesc *Desc : SkippedInstrs)
if (const uint16_t *Defs = Desc->getImplicitDefs())
while (*Defs)
MRI->setPhysRegUsed(*Defs++);
// All machine operands and other references to virtual registers have been
// replaced. Remove the virtual registers.
MRI->clearVirtRegs();

View File

@ -1531,6 +1531,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
// We must also check for clobbers caused by regmasks.
for (const auto &MO : MI->operands()) {
if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
return false;
}
}
}
// We're going to remove the copy which defines a physical reserved

View File

@ -8365,12 +8365,12 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N0CFP->isExactlyValue(1.0))
return SDValue();
SmallVector<SDNode *, 4> Users;
// Find all FDIV users of the same divisor.
for (auto *U : N1->uses()) {
// Use a set because duplicates may be present in the user list.
SetVector<SDNode *> Users;
for (auto *U : N1->uses())
if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
Users.push_back(U);
}
Users.insert(U);
if (TLI.combineRepeatedFPDivisors(Users.size())) {
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);

View File

@ -163,6 +163,7 @@ class VirtRegRewriter : public MachineFunctionPass {
SlotIndexes *Indexes;
LiveIntervals *LIS;
VirtRegMap *VRM;
SparseSet<unsigned> PhysRegs;
void rewrite();
void addMBBLiveIns();
@ -318,15 +319,54 @@ void VirtRegRewriter::rewrite() {
SmallVector<unsigned, 8> SuperDeads;
SmallVector<unsigned, 8> SuperDefs;
SmallVector<unsigned, 8> SuperKills;
SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
// Here we have a SparseSet to hold which PhysRegs are actually encountered
// in the MF we are about to iterate over so that later when we call
// setPhysRegUsed, we are only doing it for physRegs that were actually found
// in the program and not for all of the possible physRegs for the given
// target architecture. If the target has a lot of physRegs, then for a small
// program there will be a significant compile time reduction here.
PhysRegs.clear();
PhysRegs.setUniverse(TRI->getNumRegs());
// The function with uwtable should guarantee that the stack unwinder
// can unwind the stack to the previous frame. Thus, we can't apply the
// noreturn optimization if the caller function has uwtable attribute.
bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
DEBUG(MBBI->print(dbgs(), Indexes));
bool IsExitBB = MBBI->succ_empty();
for (MachineBasicBlock::instr_iterator
MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
MachineInstr *MI = MII;
++MII;
// Check if this instruction is a call to a noreturn function. If this
// is a call to noreturn function and we don't need the stack unwinding
// functionality (i.e. this function does not have uwtable attribute and
// the callee function has the nounwind attribute), then we can ignore
// the definitions set by this instruction.
if (!HasUWTable && IsExitBB && MI->isCall()) {
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
if (!MO.isGlobal())
continue;
const Function *Func = dyn_cast<Function>(MO.getGlobal());
if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
// We need to keep correct unwind information
// even if the function will not return, since the
// runtime may need it.
!Func->hasFnAttribute(Attribute::NoUnwind))
continue;
NoReturnInsts.insert(MI);
break;
}
}
for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
MachineOperand &MO = *MOI;
@ -335,6 +375,15 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
// If we encounter a VirtReg or PhysReg then get at the PhysReg and add
// it to the physreg bitset. Later we use only the PhysRegs that were
// actually encountered in the MF to populate the MRI's used physregs.
if (MO.isReg() && MO.getReg())
PhysRegs.insert(
TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
VRM->getPhys(MO.getReg()) :
MO.getReg());
if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue;
unsigned VirtReg = MO.getReg();
@ -421,5 +470,29 @@ void VirtRegRewriter::rewrite() {
}
}
}
// Tell MRI about physical registers in use.
if (NoReturnInsts.empty()) {
for (SparseSet<unsigned>::iterator
RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
if (!MRI->reg_nodbg_empty(*RegI))
MRI->setPhysRegUsed(*RegI);
} else {
for (SparseSet<unsigned>::iterator
I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
unsigned Reg = *I;
if (MRI->reg_nodbg_empty(Reg))
continue;
// Check if this register has a use that will impact the rest of the
// code. Uses in debug and noreturn instructions do not impact the
// generated code.
for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
if (!NoReturnInsts.count(&It)) {
MRI->setPhysRegUsed(Reg);
break;
}
}
}
}
}

View File

@ -180,10 +180,17 @@ uint64_t ExecutionEngineState::RemoveMapping(StringRef Name) {
}
std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
assert(GV->hasName() && "Global must have name.");
MutexGuard locked(lock);
Mangler Mang;
SmallString<128> FullName;
Mang.getNameWithPrefix(FullName, GV, false);
const DataLayout &DL =
GV->getParent()->getDataLayout().isDefault()
? *getDataLayout()
: GV->getParent()->getDataLayout();
Mangler::getNameWithPrefix(FullName, GV->getName(), DL);
return FullName.str();
}

View File

@ -266,6 +266,12 @@ void MCJIT::finalizeModule(Module *M) {
RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
SmallString<128> FullName;
Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout());
if (void *Addr = getPointerToGlobalIfAvailable(FullName))
return RuntimeDyld::SymbolInfo(static_cast<uint64_t>(
reinterpret_cast<uintptr_t>(Addr)),
JITSymbolFlags::Exported);
return Dyld.getSymbol(FullName);
}

View File

@ -98,7 +98,7 @@ void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
uint64_t LoadAddr,
size_t Size) {
// On OS X OS X __register_frame takes a single FDE as an argument.
// See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
// See http://lists.llvm.org/pipermail/llvm-dev/2013-April/061768.html
const char *P = (const char *)Addr;
const char *End = P + Size;
do {

View File

@ -613,6 +613,9 @@ bool StructType::isLayoutIdentical(StructType *Other) const {
if (isPacked() != Other->isPacked() ||
getNumElements() != Other->getNumElements())
return false;
if (!getNumElements())
return true;
return std::equal(element_begin(), element_end(), Other->element_begin());
}

View File

@ -57,7 +57,8 @@ void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
/// CopyStringRef - Copies contents of a StringRef into a block of memory and
/// null-terminates it.
static void CopyStringRef(char *Memory, StringRef Data) {
memcpy(Memory, Data.data(), Data.size());
if (!Data.empty())
memcpy(Memory, Data.data(), Data.size());
Memory[Data.size()] = 0; // Null terminate string.
}

View File

@ -593,6 +593,7 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
if (Change) {
Substs[MO.getReg()] = Reg;
MO.setReg(Reg);
MRI->setPhysRegUsed(Reg);
Changed = true;
}

View File

@ -354,6 +354,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes && NeedsRealignment) {
// Use the first callee-saved register as a scratch register.
scratchSPReg = AArch64::X9;
MF.getRegInfo().setPhysRegUsed(scratchSPReg);
}
// If we're a leaf function, try using the red zone.

View File

@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"true",
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
"EnableHugeScratchBuffer",
"true",
"Enable scratch buffer sizes greater than 128 GB">;
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",

View File

@ -1029,6 +1029,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &SLC, SDValue &TFE) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return false;
SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
GLC, SLC, TFE);
@ -1095,13 +1099,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
// (add n0, c1)
if (CurDAG->isBaseWithConstantOffset(Addr)) {
SDValue N0 = Addr.getOperand(0);
SDValue N1 = Addr.getOperand(1);
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
VAddr = Addr.getOperand(0);
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
// Offsets in vaddr must be positive.
if (CurDAG->SignBitIsZero(N0)) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
if (isLegalMUBUFImmOffset(C1)) {
VAddr = N0;
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
return true;
}
}
}

View File

@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
IsaVersion(ISAVersion0_0_0),
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),

View File

@ -89,6 +89,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
bool FeatureDisable;
int LDSBankCount;
unsigned IsaVersion;
bool EnableHugeScratchBuffer;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@ -271,6 +272,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
return DevName;
}
bool enableHugeScratchBuffer() const {
return EnableHugeScratchBuffer;
}
bool dumpCode() const {
return DumpCode;
}

View File

@ -1719,7 +1719,6 @@ MachineBasicBlock *
AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
MachineBasicBlock *LoopHeader = LoopRep->getHeader();
MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
if (!LoopHeader || !LoopLatch)
return nullptr;
@ -1732,18 +1731,9 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
FuncRep->push_back(DummyExitBlk); //insert to function
SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
MachineBasicBlock::iterator I = BranchMI;
unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
llvm_unreachable("Extra register needed to handle CFG");
MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
MachineInstrBuilder MIB(*FuncRep, NewMI);
MIB.addMBB(LoopHeader);
MIB.addReg(ImmReg, false);
SHOWNEWINSTR(NewMI);
BranchMI->eraseFromParent();
LoopLatch->addSuccessor(DummyExitBlk);
return DummyExitBlk;
LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
Ctx.emitError("Extra register needed to handle CFG");
return nullptr;
}
void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {

View File

@ -138,3 +138,7 @@ def : ProcessorModel<"iceland", SIQuarterSpeedModel,
def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
>;
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
[FeatureVolcanicIslands, FeatureISAVersion8_0_1]
>;

View File

@ -254,6 +254,12 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
return false;
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
// Flat instructions do not have offsets, and only have the register
// address.
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
}
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS) const {
@ -263,8 +269,21 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
switch (AS) {
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
// FIXME: This assumption is currently wrong. On VI we still use
// MUBUF instructions for the r + i addressing mode. As currently
// implemented, the MUBUF instructions only work on buffer < 4GB.
// It may be possible to support > 4GB buffers with MUBUF instructions,
// by setting the stride value in the resource descriptor which would
// increase the size limit to (stride * 4GB). However, this is risky,
// because it has never been validated.
return isLegalFlatAddressingMode(AM);
}
// fall-through
case AMDGPUAS::PRIVATE_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
// additionally can do r + r + i with addr64. 32-bit has more addressing
@ -324,11 +343,9 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return false;
}
case AMDGPUAS::FLAT_ADDRESS: {
// Flat instructions do not have offsets, and only have the register
// address.
return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
}
case AMDGPUAS::FLAT_ADDRESS:
return isLegalFlatAddressingMode(AM);
default:
llvm_unreachable("unhandled address space");
}
@ -812,10 +829,29 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FINode->getIndex();
return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
// A FrameIndex node represents a 32-bit offset into scratch memory. If
// the high bit of a frame index offset were to be set, this would mean
// that it represented an offset of ~2GB * 64 = ~128GB from the start of the
// scratch buffer, with 64 being the number of threads per wave.
//
// If we know the machine uses less than 128GB of scratch, then we can
// amrk the high bit of the FrameIndex node as known zero,
// which is important, because it means in most situations we can
// prove that values derived from FrameIndex nodes are non-negative.
// This enables us to take advantage of more addressing modes when
// accessing scratch buffers, since for scratch reads/writes, the register
// offset must always be positive.
SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
if (Subtarget->enableHugeScratchBuffer())
return TFI;
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
}
/// This transforms the control flow intrinsics to get the branch destination as
@ -2034,6 +2070,13 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}
static bool isFrameIndexOp(SDValue Op) {
if (Op.getOpcode() == ISD::AssertZext)
Op = Op.getOperand(0);
return isa<FrameIndexSDNode>(Op);
}
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
@ -2042,7 +2085,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
if (!isFrameIndexOp(Node->getOperand(i))) {
Ops.push_back(Node->getOperand(i));
continue;
}

View File

@ -56,6 +56,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
public:
SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);

View File

@ -1600,12 +1600,14 @@ multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
SIMCInstr <opName#"_e32", SISubtarget.SI> {
let Defs = !if(DefExec, [EXEC], []);
let hasSideEffects = DefExec;
let AssemblerPredicates = [isSICI];
}
def _vi : VOPC<op.VI, ins, asm, []>,
SIMCInstr <opName#"_e32", SISubtarget.VI> {
let Defs = !if(DefExec, [EXEC], []);
let hasSideEffects = DefExec;
let AssemblerPredicates = [isVI];
}
}

View File

@ -2910,9 +2910,6 @@ defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, constant_load>;
} // End Predicates = [isSICI]
class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
@ -3273,13 +3270,13 @@ def : Pat <
(f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
(f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
(V_CNDMASK_B64_PSEUDO
$x,
(V_MIN_F64
SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
SRCMODS.NONE,
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
DSTCLAMP.NONE, DSTOMOD.NONE),
$x,
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/))
>;
@ -3291,13 +3288,13 @@ def : Pat <
$x,
SRCMODS.NEG,
(V_CNDMASK_B64_PSEUDO
$x,
(V_MIN_F64
SRCMODS.NONE,
(V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE),
SRCMODS.NONE,
(V_MOV_B64_PSEUDO 0x3fefffffffffffff),
DSTCLAMP.NONE, DSTOMOD.NONE),
$x,
(V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
DSTCLAMP.NONE, DSTOMOD.NONE)
>;

View File

@ -53,6 +53,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
MRI.setPhysRegUsed(LaneVGPR);
// Add this register as live-in to all blocks to avoid machine verifer
// complaining about use of an undefined physical register.

View File

@ -91,6 +91,7 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
MRI.setPhysRegUsed(ScratchOffsetReg);
BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
.addReg(ScratchOffsetPreloadReg);
} else {

View File

@ -348,7 +348,8 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
&AMDGPU::SReg_128RegClass,
&AMDGPU::VReg_256RegClass,
&AMDGPU::SReg_256RegClass,
&AMDGPU::VReg_512RegClass
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass
};
for (const TargetRegisterClass *BaseClass : BaseClasses) {
@ -499,7 +500,7 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (MRI.reg_nodbg_empty(*I))
if (!MRI.isPhysRegUsed(*I))
return *I;
}
return AMDGPU::NoRegister;

View File

@ -103,4 +103,46 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
>;
// Patterns for global loads with no offset
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
(inst $addr, 0, 0, 0)
>;
def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(node vt:$data, i64:$addr),
(inst $data, $addr, 0, 0, 0)
>;
def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr, vt:$data)),
(inst $addr, $data, 0, 0)
>;
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
} // End Predicates = [isVI]

View File

@ -4583,6 +4583,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
if (CmpVT.getVectorElementType() == MVT::i64)
// 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
// but it's possible that our operands are 64-bit but our result is 32-bit.
// Bail in this case.
return SDValue();
if (Op1.getValueType().isFloatingPoint()) {
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal FP comparison");

View File

@ -118,7 +118,6 @@ namespace {
};
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
SmallVector<MachineInstr*,4> MergeBaseCandidates;
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before);
@ -141,7 +140,6 @@ namespace {
MachineBasicBlock::iterator &MBBI);
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@ -933,6 +931,11 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
CanMergeToLSMulti = false;
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
// deprecated; LDM to PC is fine but cannot happen here.
if (PReg == ARM::SP || PReg == ARM::PC)
CanMergeToLSMulti = CanMergeToLSDouble = false;
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
@ -940,16 +943,15 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
if (Reg == ARM::SP || Reg == ARM::PC)
break;
// See if the current load/store may be part of a multi load/store.
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
bool PartOfLSMulti = CanMergeToLSMulti;
if (PartOfLSMulti) {
// Cannot load from SP
if (Reg == ARM::SP)
PartOfLSMulti = false;
// Register numbers must be in ascending order.
else if (RegNum <= PRegNum)
if (RegNum <= PRegNum)
PartOfLSMulti = false;
// For VFP / NEON load/store multiples, the registers must be
// consecutive and within the limit on the number of registers per
@ -993,6 +995,76 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
} while (SIndex < EIndex);
}
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
unsigned Bytes, unsigned Limit,
ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
break;
case ARM::tSUBspi:
break;
}
// Make sure the offset fits in 8 bits.
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
return CheckCPSRDef ? !definesCPSR(MI) : true;
}
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
unsigned Bytes, unsigned Limit,
ARMCC::CondCodes Pred, unsigned PredReg) {
unsigned MyPredReg = 0;
if (!MI)
return false;
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
break;
case ARM::tADDspi:
break;
}
if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
(MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
return CheckCPSRDef ? !definesCPSR(MI) : true;
}
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
ARM_AM::AMSubMode Mode) {
switch (Opc) {
@ -1060,75 +1132,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
}
}
/// Check if the given instruction increments or decrements a register and
/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
/// generated by the instruction are possibly read as well.
static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
ARMCC::CondCodes Pred, unsigned PredReg) {
bool CheckCPSRDef;
int Scale;
switch (MI.getOpcode()) {
case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
case ARM::t2SUBri:
case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
case ARM::t2ADDri:
case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
default: return 0;
}
unsigned MIPredReg;
if (MI.getOperand(0).getReg() != Reg ||
MI.getOperand(1).getReg() != Reg ||
getInstrPredicate(&MI, MIPredReg) != Pred ||
MIPredReg != PredReg)
return 0;
if (CheckCPSRDef && definesCPSR(&MI))
return 0;
return MI.getOperand(2).getImm() * Scale;
}
/// Searches for an increment or decrement of \p Reg before \p MBBI.
static MachineBasicBlock::iterator
findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
Offset = 0;
MachineBasicBlock &MBB = *MBBI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator EndMBBI = MBB.end();
if (MBBI == BeginMBBI)
return EndMBBI;
// Skip debug values.
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
--PrevMBBI;
Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
return Offset == 0 ? EndMBBI : PrevMBBI;
}
/// Searches for a increment or decrement of \p Reg after \p MBBI.
static MachineBasicBlock::iterator
findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
Offset = 0;
MachineBasicBlock &MBB = *MBBI->getParent();
MachineBasicBlock::iterator EndMBBI = MBB.end();
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
// Skip debug values.
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
if (NextMBBI == EndMBBI)
return EndMBBI;
Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
return Offset == 0 ? EndMBBI : NextMBBI;
}
/// Fold proceeding/trailing inc/dec of base register into the
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
@ -1148,6 +1151,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
const MachineOperand &BaseOP = MI->getOperand(0);
unsigned Base = BaseOP.getReg();
bool BaseKill = BaseOP.isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
unsigned Opcode = MI->getOpcode();
@ -1159,24 +1163,49 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
if (MI->getOperand(i).getReg() == Base)
return false;
int Bytes = getLSMultipleTransferSize(MI);
MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator MBBI(MI);
int Offset;
MachineBasicBlock::iterator MergeInstr
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
if (Mode == ARM_AM::ia && Offset == -Bytes) {
Mode = ARM_AM::db;
} else if (Mode == ARM_AM::ib && Offset == -Bytes) {
Mode = ARM_AM::da;
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
return false;
// Try merging with the previous instruction.
MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
Mode = ARM_AM::db;
DoMerge = true;
} else if (Mode == ARM_AM::ib &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
Mode = ARM_AM::da;
DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
}
MBB.erase(MergeInstr);
// Try merging with the next instruction.
MachineBasicBlock::iterator EndMBBI = MBB.end();
if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
DoMerge = true;
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
DoMerge = true;
}
if (DoMerge)
MBB.erase(NextMBBI);
}
if (!DoMerge)
return false;
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@ -1254,6 +1283,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
unsigned Bytes = getLSMultipleTransferSize(MI);
unsigned Opcode = MI->getOpcode();
DebugLoc DL = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
@ -1265,6 +1295,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
bool isLd = isLoadSingle(Opcode);
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (MI->getOperand(0).getReg() == Base)
@ -1272,31 +1303,55 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
int Bytes = getLSMultipleTransferSize(MI);
bool DoMerge = false;
ARM_AM::AddrOpc AddSub = ARM_AM::add;
unsigned NewOpc = 0;
// AM2 - 12 bits, thumb2 - 8 bits.
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
// Try merging with the previous instruction.
MachineBasicBlock &MBB = *MI->getParent();
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
MachineBasicBlock::iterator MBBI(MI);
int Offset;
MachineBasicBlock::iterator MergeInstr
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
unsigned NewOpc;
if (!isAM5 && Offset == Bytes) {
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
} else if (Offset == -Bytes) {
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (Offset == Bytes) {
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
} else if (!isAM5 && Offset == -Bytes) {
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
} else
return false;
if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
} else if (!isAM5 &&
isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
DoMerge = true;
}
if (DoMerge) {
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
MBB.erase(PrevMBBI);
}
}
MBB.erase(MergeInstr);
ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
// Try merging with the next instruction.
MachineBasicBlock::iterator EndMBBI = MBB.end();
if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
} else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
DoMerge = true;
}
if (DoMerge) {
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
MBB.erase(NextMBBI);
}
}
if (!DoMerge)
return false;
bool isLd = isLoadSingle(Opcode);
if (isAM5) {
// VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
@ -1313,16 +1368,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
if (isAM2) {
// LDR_PRE, LDR_POST
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
}
} else {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
.addReg(Base, RegState::Define)
@ -1334,12 +1391,13 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
// the vestigal zero-reg offset register. When that's fixed, this clause
// can be removed entirely.
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
} else {
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
@ -1351,66 +1409,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
return true;
}
bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
"Must have t2STRDi8 or t2LDRDi8");
if (MI.getOperand(3).getImm() != 0)
return false;
// Behaviour for writeback is undefined if base register is the same as one
// of the others.
const MachineOperand &BaseOp = MI.getOperand(2);
unsigned Base = BaseOp.getReg();
const MachineOperand &Reg0Op = MI.getOperand(0);
const MachineOperand &Reg1Op = MI.getOperand(1);
if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
return false;
unsigned PredReg;
ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
MachineBasicBlock::iterator MBBI(MI);
MachineBasicBlock &MBB = *MI.getParent();
int Offset;
MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
PredReg, Offset);
unsigned NewOpc;
if (Offset == 8 || Offset == -8) {
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
} else {
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
if (Offset == 8 || Offset == -8) {
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
} else
return false;
}
MBB.erase(MergeInstr);
DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
MIB.addOperand(Reg0Op).addOperand(Reg1Op)
.addReg(BaseOp.getReg(), RegState::Define);
} else {
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
MIB.addReg(BaseOp.getReg(), RegState::Define)
.addOperand(Reg0Op).addOperand(Reg1Op);
}
MIB.addReg(BaseOp.getReg(), RegState::Kill)
.addImm(Offset).addImm(Pred).addReg(PredReg);
assert(TII->get(Opcode).getNumOperands() == 6 &&
TII->get(NewOpc).getNumOperands() == 7 &&
"Unexpected number of operands in Opcode specification.");
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
MIB.addOperand(MO);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MBBI);
return true;
}
/// Returns true if instruction is a memory operation that this pass is capable
/// of operating on.
static bool isMemoryOp(const MachineInstr *MI) {
@ -1618,7 +1616,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
ARMCC::CondCodes CurrPred = ARMCC::AL;
unsigned Position = 0;
assert(Candidates.size() == 0);
assert(MergeBaseCandidates.size() == 0);
LiveRegsValid = false;
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
@ -1697,15 +1694,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MBBI = I;
--Position;
// Fallthrough to look into existing chain.
} else if (MBBI->isDebugValue()) {
} else if (MBBI->isDebugValue())
continue;
} else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
MBBI->getOpcode() == ARM::t2STRDi8) {
// ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
// remember them because we may still be able to merge add/sub into them.
MergeBaseCandidates.push_back(MBBI);
}
// If we are here then the chain is broken; Extract candidates for a merge.
if (MemOps.size() > 0) {
@ -1736,9 +1726,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (Merged) {
Changed = true;
unsigned Opcode = Merged->getOpcode();
if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
MergeBaseUpdateLSDouble(*Merged);
else
if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
@ -1753,10 +1741,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
Candidates.clear();
// Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
for (MachineInstr *MI : MergeBaseCandidates)
MergeBaseUpdateLSDouble(*MI);
MergeBaseCandidates.clear();
return Changed;
}

View File

@ -566,7 +566,7 @@ Robert Muth started working on an alternate jump table implementation that
does not put the tables in-line in the text. This is more like the llvm
default jump table implementation. This might be useful sometime. Several
revisions of patches are on the mailing list, beginning at:
http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
http://lists.llvm.org/pipermail/llvm-dev/2009-June/022763.html
//===---------------------------------------------------------------------===//

View File

@ -57,7 +57,7 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Some things to try that should be better:
// * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
// * 'movs $dst, $src' if cpsr isn't live
// See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html
// See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))

View File

@ -864,13 +864,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
// Check for an unused caller-saved register.
for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
MCPhysReg FreeReg = *CallerSavedRegs;
if (!MRI.reg_nodbg_empty(FreeReg))
if (MRI.isPhysRegUsed(FreeReg))
continue;
// Check aliased register usage.
bool IsCurrentRegUsed = false;
for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
if (!MRI.reg_nodbg_empty(*AI)) {
if (MRI.isPhysRegUsed(*AI)) {
IsCurrentRegUsed = true;
break;
}

View File

@ -500,14 +500,6 @@ def : MipsPat<(trunc (assertzext GPR64:$src)),
def : MipsPat<(i32 (trunc GPR64:$src)),
(SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
// Bypass trunc nodes for bitwise ops.
def : MipsPat<(i32 (trunc (and GPR64:$lhs, GPR64:$rhs))),
(EXTRACT_SUBREG (AND64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
def : MipsPat<(i32 (trunc (or GPR64:$lhs, GPR64:$rhs))),
(EXTRACT_SUBREG (OR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
def : MipsPat<(i32 (trunc (xor GPR64:$lhs, GPR64:$rhs))),
(EXTRACT_SUBREG (XOR64 GPR64:$lhs, GPR64:$rhs), sub_32)>;
// variable shift instructions patterns
def : MipsPat<(shl GPR64:$rt, (i32 (trunc GPR64:$rs))),
(DSLLV GPR64:$rt, (EXTRACT_SUBREG GPR64:$rs, sub_32))>;

View File

@ -267,6 +267,9 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
}
unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
if (!TargetSupported)
return 0;
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 &&
"Alloca should always return a pointer.");
@ -290,12 +293,7 @@ unsigned MipsFastISel::materializeInt(const Constant *C, MVT VT) {
return 0;
const TargetRegisterClass *RC = &Mips::GPR32RegClass;
const ConstantInt *CI = cast<ConstantInt>(C);
int64_t Imm;
if ((VT != MVT::i1) && CI->isNegative())
Imm = CI->getSExtValue();
else
Imm = CI->getZExtValue();
return materialize32BitInt(Imm, RC);
return materialize32BitInt(CI->getZExtValue(), RC);
}
unsigned MipsFastISel::materialize32BitInt(int64_t Imm,
@ -382,6 +380,9 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
if (!TargetSupported)
return 0;
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
@ -981,6 +982,13 @@ bool MipsFastISel::selectSelect(const Instruction *I) {
if (!Src1Reg || !Src2Reg || !CondReg)
return false;
unsigned ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
if (!ZExtCondReg)
return false;
if (!emitIntExt(MVT::i1, CondReg, MVT::i32, ZExtCondReg, true))
return false;
unsigned ResultReg = createResultReg(RC);
unsigned TempReg = createResultReg(RC);
@ -989,7 +997,7 @@ bool MipsFastISel::selectSelect(const Instruction *I) {
emitInst(TargetOpcode::COPY, TempReg).addReg(Src2Reg);
emitInst(CondMovOpc, ResultReg)
.addReg(Src1Reg).addReg(CondReg).addReg(TempReg);
.addReg(Src1Reg).addReg(ZExtCondReg).addReg(TempReg);
updateValueMap(I, ResultReg);
return true;
}
@ -1232,12 +1240,19 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
}
bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!TargetSupported)
return false;
CallingConv::ID CC = CLI.CallConv;
bool IsTailCall = CLI.IsTailCall;
bool IsVarArg = CLI.IsVarArg;
const Value *Callee = CLI.Callee;
MCSymbol *Symbol = CLI.Symbol;
// Do not handle FastCC.
if (CC == CallingConv::Fast)
return false;
// Allow SelectionDAG isel to handle tail calls.
if (IsTailCall)
return false;
@ -1312,6 +1327,9 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
}
bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!TargetSupported)
return false;
switch (II->getIntrinsicID()) {
default:
return false;
@ -1415,6 +1433,11 @@ bool MipsFastISel::selectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
CallingConv::ID CC = F.getCallingConv();
// Do not handle FastCC.
if (CC == CallingConv::Fast)
return false;
SmallVector<ISD::OutputArg, 4> Outs;
GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);

View File

@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
@ -53,11 +54,6 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
cl::desc("MIPS: Don't trap on integer division by zero."),
cl::init(false));
cl::opt<bool>
EnableMipsFastISel("mips-fast-isel", cl::Hidden,
cl::desc("Allow mips-fast-isel to be used"),
cl::init(false));
static const MCPhysReg Mips64DPRegs[8] = {
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@ -461,7 +457,7 @@ const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM
FastISel *
MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
if (!EnableMipsFastISel)
if (!funcInfo.MF->getTarget().Options.EnableFastISel)
return TargetLowering::createFastISel(funcInfo, libInfo);
return Mips::createFastISel(funcInfo, libInfo);
}

View File

@ -12,6 +12,7 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@ -22,10 +23,12 @@ typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
class PPCDisassembler : public MCDisassembler {
bool IsLittleEndian;
public:
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
~PPCDisassembler() override {}
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
bool IsLittleEndian)
: MCDisassembler(STI, Ctx), IsLittleEndian(IsLittleEndian) {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@ -37,7 +40,13 @@ class PPCDisassembler : public MCDisassembler {
static MCDisassembler *createPPCDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new PPCDisassembler(STI, Ctx);
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/false);
}
static MCDisassembler *createPPCLEDisassembler(const Target &T,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
return new PPCDisassembler(STI, Ctx, /*IsLittleEndian=*/true);
}
extern "C" void LLVMInitializePowerPCDisassembler() {
@ -47,7 +56,7 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
createPPCDisassembler);
TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
createPPCDisassembler);
createPPCLEDisassembler);
}
// FIXME: These can be generated by TableGen from the existing register
@ -383,9 +392,9 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
// The instruction is big-endian encoded.
uint32_t Inst =
(Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 8) | (Bytes[3] << 0);
// Read the instruction in the proper endianness.
uint32_t Inst = IsLittleEndian ? support::endian::read32le(Bytes.data())
: support::endian::read32be(Bytes.data());
if (STI.getFeatureBits()[PPC::FeatureQPX]) {
DecodeStatus result =

View File

@ -363,71 +363,85 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
SM.recordPatchPoint(MI);
PatchPointOpers Opers(&MI);
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
unsigned EncodedBytes = 0;
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
const MachineOperand &CalleeMO =
Opers.getMetaOper(PatchPointOpers::TargetPos);
// Save the current TOC pointer before the remote call.
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
if (CalleeMO.isImm()) {
int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
if (CallTarget) {
assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
"High 16 bits of call target should be zero.");
unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
EncodedBytes = 0;
// Materialize the jump address:
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
.addReg(ScratchReg)
.addImm((CallTarget >> 32) & 0xFFFF));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(32).addImm(16));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm((CallTarget >> 16) & 0xFFFF));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
.addReg(ScratchReg)
.addReg(ScratchReg)
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
// If we're on ELFv1, then we need to load the actual function pointer from
// the function descriptor.
if (!Subtarget->isELFv2ABI()) {
// Load the new TOC pointer and the function address, but not r11
// (needing this is rare, and loading it here would prevent passing it
// via a 'nest' parameter.
// If we're on ELFv1, then we need to load the actual function pointer
// from the function descriptor.
if (!Subtarget->isELFv2ABI()) {
// Load the new TOC pointer and the function address, but not r11
// (needing this is rare, and loading it here would prevent passing it
// via a 'nest' parameter.
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(8)
.addReg(ScratchReg));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
.addReg(ScratchReg)
.addImm(0)
.addReg(ScratchReg));
++EncodedBytes;
}
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8)
.addReg(ScratchReg));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
++EncodedBytes;
// Restore the TOC pointer after the call.
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(8)
.addReg(ScratchReg));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
.addReg(ScratchReg)
.addImm(0)
.addReg(ScratchReg));
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
}
} else if (CalleeMO.isGlobal()) {
const GlobalValue *GValue = CalleeMO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
++EncodedBytes;
// Restore the TOC pointer after the call.
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
.addReg(PPC::X1));
++EncodedBytes;
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP)
.addExpr(SymVar));
EncodedBytes += 2;
}
// Each instruction is 4 bytes.

View File

@ -306,10 +306,9 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
DebugLoc dl = MI->getDebugLoc();
const MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UsedRegMask = 0;
for (unsigned i = 0; i != 32; ++i)
if (MRI.isPhysRegModified(VRRegNo[i]))
if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
UsedRegMask |= 1 << (31-i);
// Live in and live out values already must be in the mask, so don't bother

View File

@ -2305,14 +2305,15 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (Swap)
std::swap(LHS, RHS);
EVT ResVT = VecVT.changeVectorElementTypeToInteger();
if (Negate) {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
PPC::VNOR,
VecVT, VCmp, VCmp);
ResVT, VCmp, VCmp);
}
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
return CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
}
if (PPCSubTarget->useCRBits())

View File

@ -580,6 +580,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
@ -1416,7 +1417,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
} else
return -1;
if (ShuffleKind == 2 && isLE)
if (isLE)
ShiftAmt = 16 - ShiftAmt;
return ShiftAmt;
@ -1429,6 +1430,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
if (N->getMaskElt(0) % EltSize != 0)
return false;
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
@ -7011,17 +7017,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// t = vsplti c, result = vsldoi t, t, 1
if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
}
}
@ -9957,6 +9966,9 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
if (Src.getValueType() == MVT::f32) {
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
DCI.AddToWorklist(Src.getNode());
} else if (Src.getValueType() != MVT::f64) {
// Make sure that we don't pick up a ppc_fp128 source value.
return SDValue();
}
unsigned FCTOp =

View File

@ -106,7 +106,7 @@ for 1,2,4,8 bytes.
//===---------------------------------------------------------------------===//
It would be nice to revert this patch:
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
And teach the dag combiner enough to simplify the code expanded before
legalize. It seems plausible that this knowledge would let it simplify other

View File

@ -190,11 +190,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
if (!MRI->reg_nodbg_empty(reg))
if (MRI->isPhysRegUsed(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
if (!MRI->reg_nodbg_empty(reg))
if (MRI->isPhysRegUsed(reg))
return false;
return true;
@ -206,10 +206,10 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
return !(MFI->hasCalls() // has calls
|| !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
|| !MRI.reg_nodbg_empty(SP::O6) // %SP is used
|| hasFP(MF)); // need %FP
return !(MFI->hasCalls() // has calls
|| MRI.isPhysRegUsed(SP::L0) // Too many registers needed
|| MRI.isPhysRegUsed(SP::O6) // %SP is used
|| hasFP(MF)); // need %FP
}
void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
@ -218,13 +218,16 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
if (MRI.reg_nodbg_empty(reg))
if (!MRI.isPhysRegUsed(reg))
continue;
unsigned mapped_reg = (reg - SP::I0 + SP::O0);
assert(MRI.reg_nodbg_empty(mapped_reg));
assert(!MRI.isPhysRegUsed(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
// Mark the reg unused.
MRI.setPhysRegUnused(reg);
}
// Rewrite MBB's Live-ins.

View File

@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
// ABI-compliant code returns long double by reference, but that conversion
// is left to higher-level code. Perhaps we could add an f128 definition
// here for code that doesn't care about the ABI?
]>;
//===----------------------------------------------------------------------===//

View File

@ -1175,6 +1175,20 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
return Chain;
}
bool SystemZTargetLowering::
CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
// Detect unsupported vector return types.
if (Subtarget.hasVector())
VerifyVectorTypes(Outs);
SmallVector<CCValAssign, 16> RetLocs;
CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
}
SDValue
SystemZTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,

View File

@ -423,6 +423,10 @@ class SystemZTargetLowering : public TargetLowering {
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,

View File

@ -681,6 +681,9 @@ class X86AsmParser : public MCTargetAsmParser {
std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
void AddDefaultSrcDestOperands(
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
std::unique_ptr<X86Operand> ParseOperand();
std::unique_ptr<X86Operand> ParseATTOperand();
std::unique_ptr<X86Operand> ParseIntelOperand();
@ -1014,6 +1017,19 @@ std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
Loc, Loc, 0);
}
void X86AsmParser::AddDefaultSrcDestOperands(
OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
if (isParsingIntelSyntax()) {
Operands.push_back(std::move(Dst));
Operands.push_back(std::move(Src));
}
else {
Operands.push_back(std::move(Src));
Operands.push_back(std::move(Dst));
}
}
std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
@ -2228,26 +2244,18 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Name.startswith("ins") && Operands.size() == 1 &&
(Name == "insb" || Name == "insw" || Name == "insl" ||
Name == "insd" )) {
if (isParsingIntelSyntax()) {
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
Operands.push_back(DefaultMemDIOperand(NameLoc));
} else {
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
Operands.push_back(DefaultMemDIOperand(NameLoc));
}
AddDefaultSrcDestOperands(Operands,
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
DefaultMemDIOperand(NameLoc));
}
// Append default arguments to "outs[bwld]"
if (Name.startswith("outs") && Operands.size() == 1 &&
(Name == "outsb" || Name == "outsw" || Name == "outsl" ||
Name == "outsd" )) {
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemSIOperand(NameLoc));
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
} else {
Operands.push_back(DefaultMemSIOperand(NameLoc));
Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
}
AddDefaultSrcDestOperands(Operands,
DefaultMemSIOperand(NameLoc),
X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
}
// Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
@ -2279,13 +2287,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
(Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
if (Operands.size() == 1) {
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemSIOperand(NameLoc));
Operands.push_back(DefaultMemDIOperand(NameLoc));
} else {
Operands.push_back(DefaultMemDIOperand(NameLoc));
Operands.push_back(DefaultMemSIOperand(NameLoc));
}
AddDefaultSrcDestOperands(Operands,
DefaultMemDIOperand(NameLoc),
DefaultMemSIOperand(NameLoc));
} else if (Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];
@ -2305,13 +2309,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (Operands.size() == 1) {
if (Name == "movsd")
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemDIOperand(NameLoc));
Operands.push_back(DefaultMemSIOperand(NameLoc));
} else {
Operands.push_back(DefaultMemSIOperand(NameLoc));
Operands.push_back(DefaultMemDIOperand(NameLoc));
}
AddDefaultSrcDestOperands(Operands,
DefaultMemSIOperand(NameLoc),
DefaultMemDIOperand(NameLoc));
} else if (Operands.size() == 3) {
X86Operand &Op = (X86Operand &)*Operands[1];
X86Operand &Op2 = (X86Operand &)*Operands[2];

View File

@ -301,9 +301,8 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
bool FPIsUsed = false;
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; i <= 6; ++i)
if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
FPIsUsed = true;
break;
}

View File

@ -1682,6 +1682,8 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(StackSize);
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
MF.getRegInfo().setPhysRegUsed(Reg10);
MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());

View File

@ -12640,24 +12640,29 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
if (User->getOpcode() == ISD::FNEG)
return Op;
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
// Assume scalar op for initialization; update for vector if needed.
// Note that there are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
// Using a 16-byte mask allows folding the load of the mask with
// the logic op, so it can save (~4 bytes) on code size.
MVT EltVT = VT;
unsigned NumElts = VT == MVT::f64 ? 2 : 4;
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
MVT LogicVT;
MVT EltVT;
unsigned NumElts;
if (VT.isVector()) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
} else {
// There are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
// Using a 16-byte mask allows folding the load of the mask with
// the logic op, so it can save (~4 bytes) on code size.
LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
EltVT = VT;
NumElts = (VT == MVT::f64) ? 2 : 4;
}
unsigned EltBits = EltVT.getSizeInBits();
@ -12670,26 +12675,25 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, Alignment);
if (VT.isVector()) {
// For a vector, cast operands to a vector type, perform the logic op,
// and cast the result back to the original value type.
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
SDValue MaskCasted = DAG.getBitcast(VecVT, Mask);
SDValue Operand = IsFNABS ? DAG.getBitcast(VecVT, Op0.getOperand(0))
: DAG.getBitcast(VecVT, Op0);
unsigned BitOp = IsFABS ? ISD::AND : IsFNABS ? ISD::OR : ISD::XOR;
return DAG.getBitcast(VT,
DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
// If not vector, then scalar.
unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Op0 = Op.getOperand(0);
bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
unsigned LogicOp =
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
return DAG.getNode(BitOp, dl, VT, Operand, Mask);
if (VT.isVector())
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
// For the scalar case extend to a 128-bit vector, perform the logic op,
// and extract the scalar result back out.
Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand);
SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode,
DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
@ -12729,10 +12733,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantVector::get(CV);
auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
// Perform all logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE. This allows load folding of the
// constants into the logic instructions.
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
// Next, clear the sign bit from the first operand (magnitude).
// If it's a constant, we can clear it here.
@ -12740,7 +12750,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat APF = Op0CN->getValueAPF();
// If the magnitude is a positive zero, the sign bit alone is enough.
if (APF.isPosZero())
return SignBit;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
DAG.getIntPtrConstant(0, dl));
APF.clearSign();
CV[0] = ConstantFP::get(*Context, APF);
} else {
@ -12750,15 +12761,18 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
}
C = ConstantVector::get(CV);
CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
// If the magnitude operand wasn't a constant, we need to AND out the sign.
if (!isa<ConstantFPSDNode>(Op0))
Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Val);
if (!isa<ConstantFPSDNode>(Op0)) {
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
}
// OR the magnitude value with the sign bit.
return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
DAG.getIntPtrConstant(0, dl));
}
static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {

View File

@ -956,18 +956,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
{ X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
// FIXME: We should not be folding Fs* scalar loads into vector
// instructions because the vector instructions require vector-sized
// loads. Lowering should create vector-sized instructions (the Fv*
// variants below) to allow load folding.
{ X86::FsANDNPDrr, X86::FsANDNPDrm, TB_ALIGN_16 },
{ X86::FsANDNPSrr, X86::FsANDNPSrm, TB_ALIGN_16 },
{ X86::FsANDPDrr, X86::FsANDPDrm, TB_ALIGN_16 },
{ X86::FsANDPSrr, X86::FsANDPSrm, TB_ALIGN_16 },
{ X86::FsORPDrr, X86::FsORPDrm, TB_ALIGN_16 },
{ X86::FsORPSrr, X86::FsORPSrm, TB_ALIGN_16 },
{ X86::FsXORPDrr, X86::FsXORPDrm, TB_ALIGN_16 },
{ X86::FsXORPSrr, X86::FsXORPSrm, TB_ALIGN_16 },
// Do not fold Fs* scalar logical op loads because there are no scalar
// load variants for these instructions. When folded, the load is required
// to be 128-bits, so the load size would not match.
{ X86::FvANDNPDrr, X86::FvANDNPDrm, TB_ALIGN_16 },
{ X86::FvANDNPSrr, X86::FvANDNPSrm, TB_ALIGN_16 },

View File

@ -2919,6 +2919,14 @@ multiclass sse12_fp_packed_vector_logical_alias<
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>,
PD, VEX_4V;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V, VEX_L;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
PD, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {

View File

@ -93,7 +93,8 @@ static Value *getFCmpValue(bool isordered, unsigned code,
case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
case 7:
if (!isordered) return ConstantInt::getTrue(LHS->getContext());
if (!isordered)
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
Pred = FCmpInst::FCMP_ORD; break;
}
return Builder->CreateFCmp(Pred, LHS, RHS);

View File

@ -2112,9 +2112,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
Value *RHS, Instruction &OrigI,
Value *&Result, Constant *&Overflow) {
assert((!OrigI.isCommutative() ||
!(isa<Constant>(LHS) && !isa<Constant>(RHS))) &&
"call with a constant RHS if possible!");
if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
std::swap(LHS, RHS);
auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
Result = OpResult;

View File

@ -658,7 +658,7 @@ bool EarlyCSE::run() {
// gains over vector when the container becomes very large due to the
// specific access patterns. For more information see the mailing list
// discussion on this:
// http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
// http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
std::deque<StackNode *> nodesToProcess;
bool Changed = false;

View File

@ -1847,10 +1847,17 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
if (OldTy == NewTy)
return true;
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() >= OldITy->getBitWidth())
return true;
// For integer types, we can't handle any bit-width differences. This would
// break both vector conversions with extension and introduce endianness
// issues when in conjunction with loads and stores.
if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
assert(cast<IntegerType>(OldTy)->getBitWidth() !=
cast<IntegerType>(NewTy)->getBitWidth() &&
"We can't have the same bitwidth for different int types");
return false;
}
if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
return false;
if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@ -1885,10 +1892,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
if (OldTy == NewTy)
return V;
if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
if (NewITy->getBitWidth() > OldITy->getBitWidth())
return IRB.CreateZExt(V, NewITy);
assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
"Integer types must be the exact same to convert.");
// See if we need inttoptr for this type pair. A cast involving both scalars
// and vectors requires and additional bitcast.
@ -2134,6 +2139,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
// We can't handle loads that extend past the allocated memory.
if (DL.getTypeStoreSize(LI->getType()) > Size)
return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
@ -2152,6 +2160,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
Type *ValueTy = SI->getValueOperand()->getType();
if (SI->isVolatile())
return false;
// We can't handle stores that extend past the allocated memory.
if (DL.getTypeStoreSize(ValueTy) > Size)
return false;
// Note that we don't count vector loads or stores as whole-alloca
// operations which enable integer widening because we would prefer to use
// vector widening instead.
@ -2585,6 +2596,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
bool IsPtrAdjusted = false;
Value *V;
if (VecTy) {
@ -2592,13 +2604,27 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
} else if (IntTy && LI.getType()->isIntegerTy()) {
V = rewriteIntegerLoad(LI);
} else if (NewBeginOffset == NewAllocaBeginOffset &&
canConvertValue(DL, NewAllocaTy, LI.getType())) {
NewEndOffset == NewAllocaEndOffset &&
(canConvertValue(DL, NewAllocaTy, TargetTy) ||
(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
TargetTy->isIntegerTy()))) {
LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
LI.isVolatile(), LI.getName());
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
V = NewLI;
// If this is an integer load past the end of the slice (which means the
// bytes outside the slice are undef or this load is dead) just forcibly
// fix the integer size with correct handling of endianness.
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
if (AITy->getBitWidth() < TITy->getBitWidth()) {
V = IRB.CreateZExt(V, TITy, "load.ext");
if (DL.isBigEndian())
V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
"endian_shift");
}
} else {
Type *LTy = TargetTy->getPointerTo();
LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
@ -2718,10 +2744,25 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
if (IntTy && V->getType()->isIntegerTy())
return rewriteIntegerStore(V, SI);
const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
StoreInst *NewSI;
if (NewBeginOffset == NewAllocaBeginOffset &&
NewEndOffset == NewAllocaEndOffset &&
canConvertValue(DL, V->getType(), NewAllocaTy)) {
(canConvertValue(DL, V->getType(), NewAllocaTy) ||
(IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
V->getType()->isIntegerTy()))) {
// If this is an integer store past the end of slice (and thus the bytes
// past that point are irrelevant or this is unreachable), truncate the
// value prior to storing.
if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
if (VITy->getBitWidth() > AITy->getBitWidth()) {
if (DL.isBigEndian())
V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
"endian_shift");
V = IRB.CreateTrunc(V, AITy, "load.trunc");
}
V = convertValue(DL, IRB, V, NewAllocaTy);
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());

View File

@ -227,10 +227,16 @@ Value *Scatterer::operator[](unsigned I) {
if (!Idx)
break;
unsigned J = Idx->getZExtValue();
CV[J] = Insert->getOperand(1);
V = Insert->getOperand(0);
if (I == J)
if (I == J) {
CV[J] = Insert->getOperand(1);
return CV[J];
} else if (!CV[J]) {
// Only cache the first entry we find for each index we're not actively
// searching for. This prevents us from going too far up the chain and
// caching incorrect entries.
CV[J] = Insert->getOperand(1);
}
}
CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
V->getName() + ".i" + Twine(I));

View File

@ -228,3 +228,51 @@ define i32 @test12(i32 %x, i32 %y, i8* %p) nounwind {
; CHECK-LABEL: @test12(
; CHECK: ret i32 %r
}
@P = internal global i32 715827882, align 4
@Q = internal global i32 715827883, align 4
@.str = private unnamed_addr constant [7 x i8] c"%u %u\0A\00", align 1
; Make sure we recognize that u[0] and u[Global + Cst] may alias
; when the addition has wrapping semantic.
; PR24468.
; CHECK-LABEL: @test13(
; Make sure the stores appear before the related loads.
; CHECK: store i8 42,
; CHECK: store i8 99,
; Find the loads and make sure they are used in the arguments to the printf.
; CHECK: [[T0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %t, i32 0, i32 0
; CHECK: [[T0:%[a-zA-Z0-9_]+]] = load i8, i8* [[T0ADDR]], align 1
; CHECK: [[T0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[T0]] to i32
; CHECK: [[U0ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds [3 x i8], [3 x i8]* %u, i32 0, i32 0
; CHECK: [[U0:%[a-zA-Z0-9_]+]] = load i8, i8* [[U0ADDR]], align 1
; CHECK: [[U0ARG:%[a-zA-Z0-9_]+]] = zext i8 [[U0]] to i32
; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0), i32 [[T0ARG]], i32 [[U0ARG]])
; CHECK: ret
define void @test13() {
entry:
%t = alloca [3 x i8], align 1
%u = alloca [3 x i8], align 1
%tmp = load i32, i32* @P, align 4
%tmp1 = mul i32 %tmp, 3
%mul = add i32 %tmp1, -2147483646
%idxprom = zext i32 %mul to i64
%arrayidx = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 %idxprom
store i8 42, i8* %arrayidx, align 1
%tmp2 = load i32, i32* @Q, align 4
%tmp3 = mul i32 %tmp2, 3
%mul2 = add i32 %tmp3, 2147483647
%idxprom3 = zext i32 %mul2 to i64
%arrayidx4 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 %idxprom3
store i8 99, i8* %arrayidx4, align 1
%arrayidx5 = getelementptr inbounds [3 x i8], [3 x i8]* %t, i64 0, i64 0
%tmp4 = load i8, i8* %arrayidx5, align 1
%conv = zext i8 %tmp4 to i32
%arrayidx6 = getelementptr inbounds [3 x i8], [3 x i8]* %u, i64 0, i64 0
%tmp5 = load i8, i8* %arrayidx6, align 1
%conv7 = zext i8 %tmp5 to i32
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i64 0, i64 0), i32 %conv, i32 %conv7)
ret void
}
declare i32 @printf(i8*, ...)

View File

@ -39,7 +39,6 @@ return:
; CHECK-LABEL: pr18068
; CHECK: MayAlias: i32* %0, i32* %arrayidx5
; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
define i32 @pr18068(i32* %jj7, i32* %j) {
entry:

Some files were not shown because too many files have changed in this diff Show More