Vendor import of llvm release_39 branch r278877:
https://llvm.org/svn/llvm-project/llvm/branches/release_39@278877
This commit is contained in:
parent
c3aee98e72
commit
a7fe922b98
@ -293,6 +293,7 @@ endif()
|
||||
option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF)
|
||||
option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF)
|
||||
option(LLVM_ENABLE_LIBCXXABI "Use libc++abi when using libc++." OFF)
|
||||
option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
|
||||
option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
|
||||
option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
|
||||
|
||||
|
@ -61,8 +61,6 @@ licenses, and/or restrictions:
|
||||
|
||||
Program Directory
|
||||
------- ---------
|
||||
Autoconf llvm/autoconf
|
||||
llvm/projects/ModuleMaker/autoconf
|
||||
Google Test llvm/utils/unittest/googletest
|
||||
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
|
||||
pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}
|
||||
|
@ -144,6 +144,12 @@ function(add_flag_or_print_warning flag name)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(LLVM_ENABLE_LLD)
|
||||
check_cxx_compiler_flag("-fuse-ld=lld" CXX_SUPPORTS_LLD)
|
||||
append_if(CXX_SUPPORTS_LLD "-fuse-ld=lld"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
|
||||
if( LLVM_ENABLE_PIC )
|
||||
if( XCODE )
|
||||
# Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
|
||||
|
@ -436,7 +436,7 @@ For example, consider this simple LLVM example:
|
||||
The X86 instruction selector might produce this machine code for the ``div`` and
|
||||
``ret``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
;; Start of div
|
||||
%EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX
|
||||
@ -453,7 +453,7 @@ By the end of code generation, the register allocator would coalesce the
|
||||
registers and delete the resultant identity moves producing the following
|
||||
code:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
;; X is in EAX, Y is in ECX
|
||||
mov %EAX, %EDX
|
||||
@ -965,7 +965,7 @@ target code. For example, consider the following LLVM fragment:
|
||||
|
||||
This LLVM code corresponds to a SelectionDAG that looks basically like this:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
|
||||
|
||||
|
@ -144,7 +144,7 @@ exists anywhere in the file.
|
||||
The FileCheck -check-prefix option
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The FileCheck :option:`-check-prefix` option allows multiple test
|
||||
The FileCheck `-check-prefix` option allows multiple test
|
||||
configurations to be driven from one `.ll` file. This is useful in many
|
||||
circumstances, for example, testing different architectural variants with
|
||||
:program:`llc`. Here's a simple example:
|
||||
@ -303,7 +303,7 @@ be aware that the definition rule can match `after` its use.
|
||||
|
||||
So, for instance, the code below will pass:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
|
||||
; CHECK-DAG: vmov.32 [[REG2]][1]
|
||||
@ -312,7 +312,7 @@ So, for instance, the code below will pass:
|
||||
|
||||
While this other code, will not:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
|
||||
; CHECK-DAG: vmov.32 [[REG2]][1]
|
||||
@ -473,7 +473,7 @@ To match newline characters in regular expressions the character class
|
||||
|
||||
matches output of the form (from llvm-dwarfdump):
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
DW_AT_location [DW_FORM_sec_offset] (0x00000233)
|
||||
DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd")
|
||||
|
@ -68,11 +68,11 @@ OPTIONS
|
||||
|
||||
.. option:: -B (default)
|
||||
|
||||
Use BSD output format. Alias for :option:`--format=bsd`.
|
||||
Use BSD output format. Alias for `--format=bsd`.
|
||||
|
||||
.. option:: -P
|
||||
|
||||
Use POSIX.2 output format. Alias for :option:`--format=posix`.
|
||||
Use POSIX.2 output format. Alias for `--format=posix`.
|
||||
|
||||
.. option:: --debug-syms, -a
|
||||
|
||||
|
@ -12,16 +12,16 @@ DESCRIPTION
|
||||
The :program:`opt` command is the modular LLVM optimizer and analyzer. It
|
||||
takes LLVM source files as input, runs the specified optimizations or analyses
|
||||
on it, and then outputs the optimized file or the analysis results. The
|
||||
function of :program:`opt` depends on whether the :option:`-analyze` option is
|
||||
function of :program:`opt` depends on whether the `-analyze` option is
|
||||
given.
|
||||
|
||||
When :option:`-analyze` is specified, :program:`opt` performs various analyses
|
||||
When `-analyze` is specified, :program:`opt` performs various analyses
|
||||
of the input source. It will usually print the results on standard output, but
|
||||
in a few cases, it will print output to standard error or generate a file with
|
||||
the analysis output, which is usually done when the output is meant for another
|
||||
program.
|
||||
|
||||
While :option:`-analyze` is *not* given, :program:`opt` attempts to produce an
|
||||
While `-analyze` is *not* given, :program:`opt` attempts to produce an
|
||||
optimized output file. The optimizations available via :program:`opt` depend
|
||||
upon what libraries were linked into it as well as any additional libraries
|
||||
that have been loaded with the :option:`-load` option. Use the :option:`-help`
|
||||
@ -68,19 +68,19 @@ OPTIONS
|
||||
|
||||
.. option:: -disable-opt
|
||||
|
||||
This option is only meaningful when :option:`-std-link-opts` is given. It
|
||||
This option is only meaningful when `-std-link-opts` is given. It
|
||||
disables most passes.
|
||||
|
||||
.. option:: -strip-debug
|
||||
|
||||
This option causes opt to strip debug information from the module before
|
||||
applying other optimizations. It is essentially the same as :option:`-strip`
|
||||
applying other optimizations. It is essentially the same as `-strip`
|
||||
but it ensures that stripping of debug information is done first.
|
||||
|
||||
.. option:: -verify-each
|
||||
|
||||
This option causes opt to add a verify pass after every pass otherwise
|
||||
specified on the command line (including :option:`-verify`). This is useful
|
||||
specified on the command line (including `-verify`). This is useful
|
||||
for cases where it is suspected that a pass is creating an invalid module but
|
||||
it is not clear which pass is doing it.
|
||||
|
||||
|
@ -406,7 +406,7 @@ outlined. After the handler is outlined, this intrinsic is simply removed.
|
||||
``llvm.eh.exceptionpointer``
|
||||
----------------------------
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
i8 addrspace(N)* @llvm.eh.padparam.pNi8(token %catchpad)
|
||||
|
||||
@ -427,7 +427,7 @@ backend. Uses of them are generated by the backend's
|
||||
``llvm.eh.sjlj.setjmp``
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
i32 @llvm.eh.sjlj.setjmp(i8* %setjmp_buf)
|
||||
|
||||
@ -664,7 +664,7 @@ all of the new IR instructions:
|
||||
return 0;
|
||||
}
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 {
|
||||
entry:
|
||||
@ -741,7 +741,7 @@ C++ code:
|
||||
}
|
||||
}
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
|
||||
entry:
|
||||
|
@ -43,7 +43,7 @@ The following additional relocation types are supported:
|
||||
corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
|
||||
``IMAGE_REL_AMD64_ADDR32NB`` (64-bit).
|
||||
|
||||
.. code-block:: gas
|
||||
.. code-block:: text
|
||||
|
||||
.text
|
||||
fun:
|
||||
|
@ -204,7 +204,7 @@ IR features is specified by the selected :ref:`GC strategy description
|
||||
Specifying GC code generation: ``gc "..."``
|
||||
-------------------------------------------
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define <returntype> @name(...) gc "name" { ... }
|
||||
|
||||
|
@ -105,7 +105,7 @@ memory, or a global variable.
|
||||
|
||||
To make this clear, let's consider a more obtuse example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%MyVar = uninitialized global i32
|
||||
...
|
||||
@ -142,7 +142,7 @@ Quick answer: there are no superfluous indices.
|
||||
This question arises most often when the GEP instruction is applied to a global
|
||||
variable which is always a pointer type. For example, consider this:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%MyStruct = uninitialized global { float*, i32 }
|
||||
...
|
||||
@ -178,7 +178,7 @@ The GetElementPtr instruction dereferences nothing. That is, it doesn't access
|
||||
memory in any way. That's what the Load and Store instructions are for. GEP is
|
||||
only involved in the computation of addresses. For example, consider this:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%MyVar = uninitialized global { [40 x i32 ]* }
|
||||
...
|
||||
@ -195,7 +195,7 @@ illegal.
|
||||
In order to access the 18th integer in the array, you would need to do the
|
||||
following:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0
|
||||
%arr = load [40 x i32]** %idx
|
||||
@ -204,7 +204,7 @@ following:
|
||||
In this case, we have to load the pointer in the structure with a load
|
||||
instruction before we can index into the array. If the example was changed to:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%MyVar = uninitialized global { [40 x i32 ] }
|
||||
...
|
||||
|
@ -30,7 +30,7 @@ instructions with each other. These tables are emitted in the
|
||||
``XXXInstrInfo.inc`` file along with the functions to query them. Following
|
||||
is the definition of ``InstrMapping`` class definied in Target.td file:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class InstrMapping {
|
||||
// Used to reduce search space only to the instructions using this
|
||||
@ -69,7 +69,7 @@ non-predicated form by assigning appropriate values to the ``InstrMapping``
|
||||
fields. For this relationship, non-predicated instructions are treated as key
|
||||
instruction since they are the one used to query the interface function.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def getPredOpcode : InstrMapping {
|
||||
// Choose a FilterClass that is used as a base class for all the
|
||||
@ -116,7 +116,7 @@ to include relevant information in its definition. For example, consider
|
||||
following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false)
|
||||
instructions:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
|
||||
"$dst = add($a, $b)",
|
||||
@ -137,7 +137,7 @@ In this step, we modify these instructions to include the information
|
||||
required by the relationship model, <tt>getPredOpcode</tt>, so that they can
|
||||
be related.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
|
||||
"$dst = add($a, $b)",
|
||||
|
@ -41,7 +41,7 @@ that passes two default-constructed ``Foo`` objects to ``g`` in the
|
||||
g(Foo(), Foo());
|
||||
}
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%struct.Foo = type { i32, i32 }
|
||||
declare void @Foo_ctor(%struct.Foo* %this)
|
||||
|
126
docs/LangRef.rst
126
docs/LangRef.rst
@ -839,7 +839,7 @@ Note that the Mach-O platform doesn't support COMDATs and ELF only supports
|
||||
Here is an example of a COMDAT group where a function will only be selected if
|
||||
the COMDAT key's section is the largest:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
$foo = comdat largest
|
||||
@foo = global i32 2, comdat($foo)
|
||||
@ -851,7 +851,7 @@ the COMDAT key's section is the largest:
|
||||
As a syntactic sugar the ``$name`` can be omitted if the name is the same as
|
||||
the global name:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
$foo = comdat any
|
||||
@foo = global i32 2, comdat
|
||||
@ -875,7 +875,7 @@ if a collision occurs in the symbol table.
|
||||
The combined use of COMDATS and section attributes may yield surprising results.
|
||||
For example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
$foo = comdat any
|
||||
$bar = comdat any
|
||||
@ -1205,7 +1205,7 @@ makes the format of the prologue data highly target dependent.
|
||||
A trivial example of valid prologue data for the x86 architecture is ``i8 144``,
|
||||
which encodes the ``nop`` instruction:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void @f() prologue i8 144 { ... }
|
||||
|
||||
@ -1213,7 +1213,7 @@ Generally prologue data can be formed by encoding a relative branch instruction
|
||||
which skips the metadata, as in this example of valid prologue data for the
|
||||
x86_64 architecture, where the first two bytes encode ``jmp .+10``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%0 = type <{ i8, i8, i8* }>
|
||||
|
||||
@ -2237,7 +2237,7 @@ source file name to the local function name.
|
||||
|
||||
The syntax for the source file name is simply:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
source_filename = "/path/to/source.c"
|
||||
|
||||
@ -2847,7 +2847,7 @@ cleared low bit. However, in the ``%C`` example, the optimizer is
|
||||
allowed to assume that the '``undef``' operand could be the same as
|
||||
``%Y``, allowing the whole '``select``' to be eliminated.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%A = xor undef, undef
|
||||
|
||||
@ -2899,7 +2899,7 @@ does not execute at all. This allows us to delete the divide and all
|
||||
code after it. Because the undefined operation "can't happen", the
|
||||
optimizer can assume that it occurs in dead code.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
a: store undef -> %X
|
||||
b: store %X -> undef
|
||||
@ -3884,7 +3884,7 @@ their operand. For example:
|
||||
|
||||
Metadata nodes that aren't uniqued use the ``distinct`` keyword. For example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = distinct !{!"test\00", i32 10}
|
||||
|
||||
@ -3949,7 +3949,7 @@ fields are tuples containing the debug info to be emitted along with the compile
|
||||
unit, regardless of code optimizations (some nodes are only emitted if there are
|
||||
references to them from instructions).
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
|
||||
isOptimized: true, flags: "-O2", runtimeVersion: 2,
|
||||
@ -3985,7 +3985,7 @@ DIBasicType
|
||||
``DIBasicType`` nodes represent primitive types, such as ``int``, ``bool`` and
|
||||
``float``. ``tag:`` defaults to ``DW_TAG_base_type``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
|
||||
encoding: DW_ATE_unsigned_char)
|
||||
@ -3994,7 +3994,7 @@ DIBasicType
|
||||
The ``encoding:`` describes the details of the type. Usually it's one of the
|
||||
following:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
DW_ATE_address = 1
|
||||
DW_ATE_boolean = 2
|
||||
@ -4014,7 +4014,7 @@ refers to a tuple; the first operand is the return type, while the rest are the
|
||||
types of the formal arguments in order. If the first operand is ``null``, that
|
||||
represents a function with no return value (such as ``void foo() {}`` in C++).
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !BasicType(name: "int", size: 32, align: 32, DW_ATE_signed)
|
||||
!1 = !BasicType(name: "char", size: 8, align: 8, DW_ATE_signed_char)
|
||||
@ -4028,7 +4028,7 @@ DIDerivedType
|
||||
``DIDerivedType`` nodes represent types derived from other types, such as
|
||||
qualified types.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
|
||||
encoding: DW_ATE_unsigned_char)
|
||||
@ -4037,7 +4037,7 @@ qualified types.
|
||||
|
||||
The following ``tag:`` values are valid:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
DW_TAG_member = 13
|
||||
DW_TAG_pointer_type = 15
|
||||
@ -4089,7 +4089,7 @@ does not have ``flags: DIFlagFwdDecl`` set. LLVM tools that link modules
|
||||
together will unique such definitions at parse time via the ``identifier:``
|
||||
field, even if the nodes are ``distinct``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DIEnumerator(name: "SixKind", value: 7)
|
||||
!1 = !DIEnumerator(name: "SevenKind", value: 7)
|
||||
@ -4100,7 +4100,7 @@ field, even if the nodes are ``distinct``.
|
||||
|
||||
The following ``tag:`` values are valid:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
DW_TAG_array_type = 1
|
||||
DW_TAG_class_type = 2
|
||||
@ -4219,7 +4219,7 @@ type with an ODR ``identifier:`` and that does not set ``flags: DIFwdDecl``,
|
||||
then the subprogram declaration is uniqued based only on its ``linkageName:``
|
||||
and ``scope:``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void @_Z3foov() !dbg !0 {
|
||||
...
|
||||
@ -4244,7 +4244,7 @@ DILexicalBlock
|
||||
two lexical blocks at same depth. They are valid targets for ``scope:``
|
||||
fields.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = distinct !DILexicalBlock(scope: !1, file: !2, line: 7, column: 35)
|
||||
|
||||
@ -4290,7 +4290,7 @@ the ``arg:`` field is set to non-zero, then this variable is a subprogram
|
||||
parameter, and it will be included in the ``variables:`` field of its
|
||||
:ref:`DISubprogram`.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DILocalVariable(name: "this", arg: 1, scope: !3, file: !2, line: 7,
|
||||
type: !3, flags: DIFlagArtificial)
|
||||
@ -4313,7 +4313,7 @@ The current supported vocabulary is limited:
|
||||
- ``DW_OP_bit_piece, 16, 8`` specifies the offset and size (``16`` and ``8``
|
||||
here, respectively) of the variable piece from the working expression.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !DIExpression(DW_OP_deref)
|
||||
!1 = !DIExpression(DW_OP_plus, 3)
|
||||
@ -4336,7 +4336,7 @@ DIImportedEntity
|
||||
``DIImportedEntity`` nodes represent entities (such as modules) imported into a
|
||||
compile unit.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0,
|
||||
entity: !1, line: 7)
|
||||
@ -4349,7 +4349,7 @@ The ``name:`` field is the macro identifier, followed by macro parameters when
|
||||
defining a function-like macro, and the ``value`` field is the token-string
|
||||
used to expand the macro identifier.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)",
|
||||
value: "((x) + 1)")
|
||||
@ -4362,7 +4362,7 @@ DIMacroFile
|
||||
The ``nodes:`` field is a list of ``DIMacro`` and ``DIMacroFile`` nodes that
|
||||
appear in the included source file.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2,
|
||||
nodes: !3)
|
||||
@ -5660,7 +5660,7 @@ block. Therefore, it must be the only non-phi instruction in the block.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
dispatch1:
|
||||
%cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller
|
||||
@ -5711,7 +5711,7 @@ the ``catchret``'s behavior is undefined.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
catchret from %catch label %continue
|
||||
|
||||
@ -5761,7 +5761,7 @@ It transfers control to ``continue`` or unwinds out of the function.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
cleanupret from %cleanup unwind to caller
|
||||
cleanupret from %cleanup unwind label %continue
|
||||
@ -5851,7 +5851,7 @@ unsigned and/or signed overflow, respectively, occurs.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = add i32 4, %var ; yields i32:result = 4 + %var
|
||||
|
||||
@ -5890,7 +5890,7 @@ optimizations:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = fadd float 4.0, %var ; yields float:result = 4.0 + %var
|
||||
|
||||
@ -5942,7 +5942,7 @@ unsigned and/or signed overflow, respectively, occurs.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = sub i32 4, %var ; yields i32:result = 4 - %var
|
||||
<result> = sub i32 0, %val ; yields i32:result = -%var
|
||||
@ -5985,7 +5985,7 @@ unsafe floating point optimizations:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = fsub float 4.0, %var ; yields float:result = 4.0 - %var
|
||||
<result> = fsub float -0.0, %val ; yields float:result = -%var
|
||||
@ -6039,7 +6039,7 @@ unsigned and/or signed overflow, respectively, occurs.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = mul i32 4, %var ; yields i32:result = 4 * %var
|
||||
|
||||
@ -6078,7 +6078,7 @@ unsafe floating point optimizations:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = fmul float 4.0, %var ; yields float:result = 4.0 * %var
|
||||
|
||||
@ -6122,7 +6122,7 @@ such, "((a udiv exact b) mul b) == a").
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = udiv i32 4, %var ; yields i32:result = 4 / %var
|
||||
|
||||
@ -6168,7 +6168,7 @@ a :ref:`poison value <poisonvalues>` if the result would be rounded.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = sdiv i32 4, %var ; yields i32:result = 4 / %var
|
||||
|
||||
@ -6207,7 +6207,7 @@ unsafe floating point optimizations:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = fdiv float 4.0, %var ; yields float:result = 4.0 / %var
|
||||
|
||||
@ -6249,7 +6249,7 @@ Taking the remainder of a division by zero leads to undefined behavior.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = urem i32 4, %var ; yields i32:result = 4 % %var
|
||||
|
||||
@ -6304,7 +6304,7 @@ result of the division and the remainder.)
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = srem i32 4, %var ; yields i32:result = 4 % %var
|
||||
|
||||
@ -6344,7 +6344,7 @@ to enable otherwise unsafe floating point optimizations:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = frem float 4.0, %var ; yields float:result = 4.0 % %var
|
||||
|
||||
@ -6406,7 +6406,7 @@ nsw/nuw bits in (mul %op1, (shl 1, %op2)).
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = shl i32 4, %var ; yields i32: 4 << %var
|
||||
<result> = shl i32 4, 2 ; yields i32: 16
|
||||
@ -6455,7 +6455,7 @@ non-zero.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = lshr i32 4, 1 ; yields i32:result = 2
|
||||
<result> = lshr i32 4, 2 ; yields i32:result = 1
|
||||
@ -6506,7 +6506,7 @@ non-zero.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = ashr i32 4, 1 ; yields i32:result = 2
|
||||
<result> = ashr i32 4, 2 ; yields i32:result = 1
|
||||
@ -6558,7 +6558,7 @@ The truth table used for the '``and``' instruction is:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = and i32 4, %var ; yields i32:result = 4 & %var
|
||||
<result> = and i32 15, 40 ; yields i32:result = 8
|
||||
@ -6657,7 +6657,7 @@ The truth table used for the '``xor``' instruction is:
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = xor i32 4, %var ; yields i32:result = 4 ^ %var
|
||||
<result> = xor i32 15, 40 ; yields i32:result = 39
|
||||
@ -6710,7 +6710,7 @@ exceeds the length of ``val``, the results are undefined.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = extractelement <4 x i32> %vec, i32 0 ; yields i32
|
||||
|
||||
@ -6752,7 +6752,7 @@ undefined.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32>
|
||||
|
||||
@ -6800,7 +6800,7 @@ only one vector.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
|
||||
<4 x i32> <i32 0, i32 4, i32 1, i32 5> ; yields <4 x i32>
|
||||
@ -6859,7 +6859,7 @@ the index operands.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = extractvalue {i32, float} %agg, 0 ; yields i32
|
||||
|
||||
@ -8126,7 +8126,7 @@ or :ref:`ptrtoint <i_ptrtoint>` instructions first.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%X = bitcast i8 255 to i8 ; yields i8 :-1
|
||||
%Y = bitcast i32* %x to sint* ; yields sint*:%x
|
||||
@ -8265,7 +8265,7 @@ as the values being compared. Otherwise, the result is an ``i1``.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = icmp eq i32 4, 5 ; yields: result=false
|
||||
<result> = icmp ne float* %X, %X ; yields: result=false
|
||||
@ -8379,7 +8379,7 @@ assumptions to be made about the values of input arguments; namely
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
<result> = fcmp oeq float 4.0, 5.0 ; yields: result=false
|
||||
<result> = fcmp one float 4.0, 5.0 ; yields: result=true
|
||||
@ -8815,7 +8815,7 @@ that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
dispatch:
|
||||
%cs = catchswitch within none [label %handler0] unwind to caller
|
||||
@ -8885,7 +8885,7 @@ that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%tok = cleanuppad within %cs []
|
||||
|
||||
@ -12481,19 +12481,19 @@ optimistic assumptions made during compilation. The semantics of
|
||||
``@llvm.experimental.deoptimize`` -- its body is defined to be
|
||||
equivalent to:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void @llvm.experimental.guard(i1 %pred, <args...>) {
|
||||
%realPred = and i1 %pred, undef
|
||||
br i1 %realPred, label %continue, label %leave [, !make.implicit !{}]
|
||||
define void @llvm.experimental.guard(i1 %pred, <args...>) {
|
||||
%realPred = and i1 %pred, undef
|
||||
br i1 %realPred, label %continue, label %leave [, !make.implicit !{}]
|
||||
|
||||
leave:
|
||||
call void @llvm.experimental.deoptimize(<args...>) [ "deopt"() ]
|
||||
ret void
|
||||
leave:
|
||||
call void @llvm.experimental.deoptimize(<args...>) [ "deopt"() ]
|
||||
ret void
|
||||
|
||||
continue:
|
||||
ret void
|
||||
}
|
||||
continue:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
with the optional ``[, !make.implicit !{}]`` present if and only if it
|
||||
|
@ -111,7 +111,6 @@ Here is an example of a YAML document that contains an LLVM module:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
--- |
|
||||
define i32 @inc(i32* %x) {
|
||||
entry:
|
||||
%0 = load i32, i32* %x
|
||||
@ -119,7 +118,6 @@ Here is an example of a YAML document that contains an LLVM module:
|
||||
store i32 %1, i32* %x
|
||||
ret i32 %1
|
||||
}
|
||||
...
|
||||
|
||||
.. _YAML block literal string: http://www.yaml.org/spec/1.2/spec.html#id2795688
|
||||
|
||||
@ -129,7 +127,7 @@ Machine Functions
|
||||
The remaining YAML documents contain the machine functions. This is an example
|
||||
of such YAML document:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
---
|
||||
name: inc
|
||||
@ -172,7 +170,7 @@ A machine basic block is defined in a single block definition source construct
|
||||
that contains the block's ID.
|
||||
The example below defines two blocks that have an ID of zero and one:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0:
|
||||
<instructions>
|
||||
@ -182,7 +180,7 @@ The example below defines two blocks that have an ID of zero and one:
|
||||
A machine basic block can also have a name. It should be specified after the ID
|
||||
in the block's definition:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry: ; This block's name is "entry"
|
||||
<instructions>
|
||||
@ -196,7 +194,7 @@ Block References
|
||||
The machine basic blocks are identified by their ID numbers. Individual
|
||||
blocks are referenced using the following syntax:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%bb.<id>[.<name>]
|
||||
|
||||
@ -213,7 +211,7 @@ Successors
|
||||
The machine basic block's successors have to be specified before any of the
|
||||
instructions:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry:
|
||||
successors: %bb.1.then, %bb.2.else
|
||||
@ -227,7 +225,7 @@ The branch weights can be specified in brackets after the successor blocks.
|
||||
The example below defines a block that has two successors with branch weights
|
||||
of 32 and 16:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry:
|
||||
successors: %bb.1.then(32), %bb.2.else(16)
|
||||
@ -240,7 +238,7 @@ Live In Registers
|
||||
The machine basic block's live in registers have to be specified before any of
|
||||
the instructions:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry:
|
||||
liveins: %edi, %esi
|
||||
@ -255,7 +253,7 @@ Miscellaneous Attributes
|
||||
The attributes ``IsAddressTaken``, ``IsLandingPad`` and ``Alignment`` can be
|
||||
specified in brackets after the block's definition:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry (address-taken):
|
||||
<instructions>
|
||||
@ -278,7 +276,7 @@ The instruction's name is usually specified before the operands. The example
|
||||
below shows an instance of the X86 ``RETQ`` instruction with a single machine
|
||||
operand:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
RETQ %eax
|
||||
|
||||
@ -287,7 +285,7 @@ operands, the instruction's name has to be specified after them. The example
|
||||
below shows an instance of the AArch64 ``LDPXpost`` instruction with three
|
||||
defined register operands:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%sp, %fp, %lr = LDPXpost %sp, 2
|
||||
|
||||
@ -303,7 +301,7 @@ Instruction Flags
|
||||
|
||||
The flag ``frame-setup`` can be specified before the instruction's name:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%fp = frame-setup ADDXri %sp, 0, 0
|
||||
|
||||
@ -321,13 +319,13 @@ but they can also be used in a number of other places, like the
|
||||
The physical registers are identified by their name. They use the following
|
||||
syntax:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%<name>
|
||||
|
||||
The example below shows three X86 physical registers:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%eax
|
||||
%r15
|
||||
@ -336,13 +334,13 @@ The example below shows three X86 physical registers:
|
||||
The virtual registers are identified by their ID number. They use the following
|
||||
syntax:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%<id>
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%0
|
||||
|
||||
@ -366,7 +364,7 @@ The immediate machine operands are untyped, 64-bit signed integers. The
|
||||
example below shows an instance of the X86 ``MOV32ri`` instruction that has an
|
||||
immediate machine operand ``-42``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%eax = MOV32ri -42
|
||||
|
||||
@ -384,14 +382,14 @@ machine operands. The register operands can also have optional
|
||||
and a reference to the tied register operand.
|
||||
The full syntax of a register operand is shown below:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
[<flags>] <register> [ :<subregister-idx-name> ] [ (tied-def <tied-op>) ]
|
||||
|
||||
This example shows an instance of the X86 ``XOR32rr`` instruction that has
|
||||
5 register operands with different register flags:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
|
||||
|
||||
@ -446,7 +444,7 @@ the subregister indices. The example below shows an instance of the ``COPY``
|
||||
pseudo instruction that uses the X86 ``sub_8bit`` subregister index to copy 8
|
||||
lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%1 = COPY %0:sub_8bit
|
||||
|
||||
@ -461,7 +459,7 @@ The global value machine operands reference the global values from the
|
||||
The example below shows an instance of the X86 ``MOV64rm`` instruction that has
|
||||
a global value operand named ``G``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
%rax = MOV64rm %rip, 1, _, @G, _
|
||||
|
||||
|
@ -70,7 +70,7 @@ clients.
|
||||
For example, a possible annotation of an ARM load of a stack-relative location
|
||||
might be annotated as:
|
||||
|
||||
.. code-block:: nasm
|
||||
.. code-block:: text
|
||||
|
||||
ldr <reg gpr:r0>, <mem regoffset:[<reg gpr:sp>, <imm:#4>]>
|
||||
|
||||
|
@ -394,7 +394,7 @@ and in right function "*FR*". And every part of *left* place is equal to the
|
||||
corresponding part of *right* place, and (!) both parts use *Value* instances,
|
||||
for example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
instr0 i32 %LV ; left side, function FL
|
||||
instr0 i32 %RV ; right side, function FR
|
||||
@ -409,13 +409,13 @@ in "*FL*" and "*FR*".
|
||||
|
||||
Consider small example here:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void %f(i32 %pf0, i32 %pf1) {
|
||||
instr0 i32 %pf0 instr1 i32 %pf1 instr2 i32 123
|
||||
}
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void %g(i32 %pg0, i32 %pg1) {
|
||||
instr0 i32 %pg0 instr1 i32 %pg0 instr2 i32 123
|
||||
|
@ -37,7 +37,7 @@ code. By default, the back-end will emit device functions. Metadata is used to
|
||||
declare a function as a kernel function. This metadata is attached to the
|
||||
``nvvm.annotations`` named metadata object, and has the following format:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!0 = !{<function-ref>, metadata !"kernel", i32 1}
|
||||
|
||||
|
@ -40,7 +40,10 @@ Non-comprehensive list of changes in this release
|
||||
|
||||
* There is no longer a "global context" available in LLVM, except for the C API.
|
||||
|
||||
* .. note about autoconf build having been removed.
|
||||
* The autoconf build system has been removed in favor of CMake. LLVM 3.9
|
||||
requires CMake 3.4.3 or later to build. For information about using CMake
|
||||
please see the documentation on :doc:`CMake`. For information about the CMake
|
||||
language there is also a :doc:`CMakePrimer` document available.
|
||||
|
||||
* .. note about C API functions LLVMParseBitcode,
|
||||
LLVMParseBitcodeInContext, LLVMGetBitcodeModuleInContext and
|
||||
@ -69,11 +72,13 @@ Non-comprehensive list of changes in this release
|
||||
need to be updated to replace the argument node and remove any dead nodes in
|
||||
cases where they currently return an ``SDNode *`` from this interface.
|
||||
|
||||
* Introduction of ThinLTO: [FIXME: needs to be documented more extensively in
|
||||
/docs/ ; ping Mehdi/Teresa before the release if not done]
|
||||
|
||||
* Raised the minimum required CMake version to 3.4.3.
|
||||
|
||||
* Added the MemorySSA analysis, which hopes to replace MemoryDependenceAnalysis.
|
||||
It should provide higher-quality results than MemDep, and be algorithmically
|
||||
faster than MemDep. Currently, GVNHoist (which is off by default) makes use of
|
||||
MemorySSA.
|
||||
|
||||
.. NOTE
|
||||
For small 1-3 sentence descriptions, just add an entry at the end of
|
||||
this list. If your description won't fit comfortably in one bullet
|
||||
@ -93,6 +98,32 @@ Non-comprehensive list of changes in this release
|
||||
|
||||
Makes programs 10x faster by doing Special New Thing.
|
||||
|
||||
GCC ABI Tag
|
||||
-----------
|
||||
|
||||
Recently, many of the Linux distributions (ex. `Fedora <http://developerblog.redhat.com/2015/02/10/gcc-5-in-fedora/>`_,
|
||||
`Debian <https://wiki.debian.org/GCC5>`_, `Ubuntu <https://wiki.ubuntu.com/GCC5>`_)
|
||||
have moved on to use the new `GCC ABI <https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html>`_
|
||||
to work around `C++11 incompatibilities in libstdc++ <https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html>`_.
|
||||
This caused `incompatibility problems <https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00153.html>`_
|
||||
with other compilers (ex. Clang), which needed to be fixed, but due to the
|
||||
experimental nature of GCC's own implementation, it took a long time for it to
|
||||
land in LLVM (`here <https://reviews.llvm.org/D18035>`_ and
|
||||
`here <https://reviews.llvm.org/D17567>`_), not in time for the 3.8 release.
|
||||
|
||||
Those patches are now present in the 3.9.0 release and should be working on the
|
||||
majority of cases, as they have been tested thoroughly. However, some bugs were
|
||||
`filled in GCC <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71712>`_ and have not
|
||||
yet been fixed, so there may be corner cases not covered by either GCC or Clang.
|
||||
Bug fixes to those problems should be reported in Bugzilla (either LLVM or GCC),
|
||||
and patches to LLVM's trunk are very likely to be back-ported to future 3.9.x
|
||||
releases (depends on how destructive it is).
|
||||
|
||||
Unfortunately, these patches won't be back-ported to 3.8.x or earlier, so we
|
||||
strongly recommend people to use 3.9.x when GCC ABI cases are at stake.
|
||||
|
||||
For a more in-depth view of the issue, check our `Bugzilla entry <https://llvm.org/bugs/show_bug.cgi?id=23529>`_.
|
||||
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
@ -110,16 +141,98 @@ link-time may be differently optimized than the one what was visible
|
||||
during optimization, and may have arbitrarily different observable
|
||||
behavior. See `PR26774 <http://llvm.org/PR26774>`_ for more details.
|
||||
|
||||
Changes to the ARM Backend
|
||||
Support for ThinLTO
|
||||
-------------------
|
||||
|
||||
LLVM now supports ThinLTO compilation, which can be invoked by compiling
|
||||
and linking with -flto=thin. The gold linker plugin, as well as linkers
|
||||
that use the new ThinLTO API in libLTO (like ld64), will transparently
|
||||
execute the ThinLTO backends in parallel threads.
|
||||
For more information on ThinLTO and the LLVM implementation, see the
|
||||
`ThinLTO blog post <http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html>`_.
|
||||
|
||||
Changes to the ARM Targets
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
**During this release the AArch64 backend has:**
|
||||
|
||||
* Gained support for Qualcomm's Kryo and Broadcom's Vulcan CPUs, including
|
||||
scheduling models.
|
||||
* Landed a scheduling model for Samsung's Exynos M1.
|
||||
* Seen a lot of work on GlobalISel.
|
||||
* Learned a few more useful combines (fadd and fmul into fmadd, adjustments to the
|
||||
stack pointer for callee-save stack memory and local stack memory etc).
|
||||
* Gained support for the Swift calling convention.
|
||||
* Switched to using SubtargetFeatures rather than testing for specific CPUs and
|
||||
to using TableGen for handling system instruction operands.
|
||||
* Like ARM, AArch64 is now using the TargetParser, so no more StringSwitches
|
||||
matching CPU, FPU or feature names will be accepted in normal code.
|
||||
* Clang can now self-host itself using LLD on AArch64.
|
||||
* Gained a big batch of tests from Halide.
|
||||
|
||||
Furthermore, LLDB now supports AArch64 compact unwind tables, as used on iOS,
|
||||
tvos and watchos.
|
||||
|
||||
**During this release the ARM target has:**
|
||||
|
||||
* ARMv8.2-A can now be targeted directly via Clang flags.
|
||||
* Adding preliminary support for Cortex-R8.
|
||||
* LLDB can now parse EABI attributes for an ELF input.
|
||||
* Initial ARM/Thumb support was added to LLD.
|
||||
* The ExecutionEngine now supports COFF/ARM.
|
||||
* Swift calling convention was ported to ARM.
|
||||
* A large number of codegen fixes around ARMv8, DSP, correct sub-target support,
|
||||
relocations, EABI, EHABI, Windows on ARM, atomics..
|
||||
* Improved assembler support for Linux/Android/Chromium sub-projects.
|
||||
* Initial support for MUSL (libc) on ARM.
|
||||
* Support for Thumb1 targets in libunwind.
|
||||
* Gained a big batch of tests from Halide.
|
||||
|
||||
|
||||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
**During this release the MIPS target has:**
|
||||
|
||||
* Enabled the Integrated Assembler by default for all ``mips-*`` and
|
||||
``mipsel-*`` triples.
|
||||
* Significantly improved the Integrated Assembler support for the n64 ABI.
|
||||
* Added the Clang frontend ``-mcompact-branches={never,optimal,always}`` option
|
||||
that controls how LLVM generates compact branches for MIPS targets.
|
||||
* Improved performance and code size for stack pointer adjustments in functions
|
||||
with large frames.
|
||||
* Implemented many instructions from the microMIPS32R6 ISA and added CodeGen
|
||||
support for most of them.
|
||||
* Added support for the triple used by Debian Stretch for little endian
|
||||
MIPS64, ie. ``mips64el-linux-gnuabi64``.
|
||||
* Removed EABI which was neither tested nor properly supported.
|
||||
* Gained the ability to self-host on MIPS32R6.
|
||||
* Gained the ability to self-host on MIPS64R2 and MIPS64R6 when using the n64
|
||||
ABI.
|
||||
* Added support for the ``LA`` macro in PIC mode for o32.
|
||||
* Added support for safestack in compiler-rt.
|
||||
* Added support for the MIPS n64 ABI in LLD.
|
||||
* Added LLD support for TLS relocations for both o32 and n64 MIPS ABIs.
|
||||
|
||||
**The MIPS target has also fixed various bugs including the following notable
|
||||
fixes:**
|
||||
|
||||
* Delay slots are no longer filled multiple times when either ``-save-temps``
|
||||
or ``-via-file-asm`` are used.
|
||||
* Updated n32 and n64 to follow the standard ELF conventions for label prefixes
|
||||
(``.L``), whereas o32 still uses its own (``$``).
|
||||
* Properly sign-extend values to GPR width for instructions that expect 32-bit
|
||||
values on 64-bit ISAs.
|
||||
* Several fixes for the delay-slot filler pass, including correct
|
||||
forbidden-slot hazard handling.
|
||||
* Fixed several errors caught by the machine verifier when turned on for MIPS.
|
||||
* Fixed broken predicate for ``SELECT`` patterns in MIPS64.
|
||||
* Fixed wrong truncation of memory address for ``LL``/``SC`` seqeuences in
|
||||
MIPS64.
|
||||
* Fixed the o32, n32 and n64 handling of ``.cprestore`` directives when inside
|
||||
a ``.set noat`` region by the Integrated Assembler.
|
||||
* Fixed the ordering of ``HI``/``LO`` pairs in the relocation table.
|
||||
* Fixed the generated ELF ``EFlags`` when Octeon is the target.
|
||||
|
||||
|
||||
Changes to the PowerPC Target
|
||||
@ -140,9 +253,16 @@ Changes to the X86 Target
|
||||
extensions using ``-march=knl``. The switch enables the ISA extensions
|
||||
AVX-512{F, CD, ER, PF}.
|
||||
|
||||
* LLVM will now prefer ``PUSH`` instructions rather than ``%esp``-relative
|
||||
``MOV`` instructions for function calls at all optimization levels greater
|
||||
than ``-O0``. Previously this transformation only occurred at ``-Os``.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
* Added backend support for OpenGL shader image, buffer storage, atomic
|
||||
counter, and compute shader extensions (supported since Mesa 12)
|
||||
|
||||
* Mesa 11.0.x is no longer supported
|
||||
|
||||
|
||||
@ -167,6 +287,21 @@ projects that have already been updated to work with LLVM 3.9.
|
||||
|
||||
* A project
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
|
||||
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
|
||||
Additional Information
|
||||
======================
|
||||
|
@ -33,7 +33,7 @@ current stack limit (minus the amount of space needed to allocate a new block) -
|
||||
this slot's offset is again dictated by ``libgcc``. The generated
|
||||
assembly looks like this on x86-64:
|
||||
|
||||
.. code-block:: nasm
|
||||
.. code-block:: text
|
||||
|
||||
leaq -8(%rsp), %r10
|
||||
cmpq %fs:112, %r10
|
||||
|
@ -230,7 +230,7 @@ following C fragment, for example:
|
||||
|
||||
Compiled to LLVM, this function would be represented like this:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
; Function Attrs: nounwind ssp uwtable
|
||||
define void @foo() #0 !dbg !4 {
|
||||
@ -303,7 +303,7 @@ The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the
|
||||
variable ``X``. The metadata ``!dbg !14`` attached to the intrinsic provides
|
||||
scope information for the variable ``X``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!14 = !DILocation(line: 2, column: 9, scope: !4)
|
||||
!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
|
||||
@ -327,7 +327,7 @@ The third intrinsic ``%llvm.dbg.declare`` encodes debugging information for
|
||||
variable ``Z``. The metadata ``!dbg !19`` attached to the intrinsic provides
|
||||
scope information for the variable ``Z``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
!18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
|
||||
!19 = !DILocation(line: 5, column: 11, scope: !18)
|
||||
@ -390,7 +390,7 @@ Given an integer global variable declared as follows:
|
||||
|
||||
a C/C++ front-end would generate the following descriptors:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
;;
|
||||
;; Define the global itself.
|
||||
@ -456,7 +456,7 @@ Given a function declared as follows:
|
||||
|
||||
a C/C++ front-end would generate the following descriptors:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
;;
|
||||
;; Define the anchor for subprograms.
|
||||
|
@ -138,7 +138,7 @@ SSA value ``%obj.relocated`` which represents the potentially changed value of
|
||||
``%obj`` after the safepoint and update any following uses appropriately. The
|
||||
resulting relocation sequence is:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
|
||||
gc "statepoint-example" {
|
||||
@ -237,7 +237,7 @@ afterwards.
|
||||
If we extend our previous example to include a pointless derived pointer,
|
||||
we get:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
|
||||
gc "statepoint-example" {
|
||||
@ -283,7 +283,7 @@ Let's assume a hypothetical GC--somewhat unimaginatively named "hypothetical-gc"
|
||||
--that requires that a TLS variable must be written to before and after a call
|
||||
to unmanaged code. The resulting relocation sequence is:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
@flag = thread_local global i32 0, align 4
|
||||
|
||||
@ -662,7 +662,7 @@ distinguish between GC references and non-GC references in IR it is given.
|
||||
|
||||
As an example, given this code:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
|
||||
gc "statepoint-example" {
|
||||
@ -672,7 +672,7 @@ As an example, given this code:
|
||||
|
||||
The pass would produce this IR:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
|
||||
gc "statepoint-example" {
|
||||
@ -737,7 +737,7 @@ As an example, given input IR of the following:
|
||||
|
||||
This pass would produce the following IR:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
define void @test() gc "statepoint-example" {
|
||||
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
|
||||
|
@ -232,7 +232,7 @@ the record ends with a semicolon.
|
||||
|
||||
Here is a simple TableGen file:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class C { bit V = 1; }
|
||||
def X : C;
|
||||
@ -276,7 +276,7 @@ derived class or definition wants to override. Let expressions consist of the
|
||||
value. For example, a new class could be added to the example above, redefining
|
||||
the ``V`` field for all of its subclasses:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class D : C { let V = 0; }
|
||||
def Z : D;
|
||||
@ -295,7 +295,7 @@ concrete classes. Parameterized TableGen classes specify a list of variable
|
||||
bindings (which may optionally have defaults) that are bound when used. Here is
|
||||
a simple example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class FPFormat<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
@ -316,7 +316,7 @@ integer.
|
||||
The more esoteric forms of `TableGen expressions`_ are useful in conjunction
|
||||
with template arguments. As an example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class ModRefVal<bits<2> val> {
|
||||
bits<2> Value = val;
|
||||
@ -346,7 +346,7 @@ be used to decouple the interface provided to the user of the class from the
|
||||
actual internal data representation expected by the class. In this case,
|
||||
running ``llvm-tblgen`` on the example prints the following definitions:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def bork { // Value
|
||||
bit isMod = 1;
|
||||
@ -379,7 +379,7 @@ commonality exists, then in a separate place indicate what all the ops are.
|
||||
|
||||
Here is an example TableGen fragment that shows this idea:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ops;
|
||||
def GPR;
|
||||
@ -405,7 +405,7 @@ inherit from multiple multiclasses, instantiating definitions from each
|
||||
multiclass. Using a multiclass this way is exactly equivalent to instantiating
|
||||
the classes multiple times yourself, e.g. by writing:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ops;
|
||||
def GPR;
|
||||
@ -432,7 +432,7 @@ the classes multiple times yourself, e.g. by writing:
|
||||
A ``defm`` can also be used inside a multiclass providing several levels of
|
||||
multiclass instantiations.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class Instruction<bits<4> opc, string Name> {
|
||||
bits<4> opcode = opc;
|
||||
@ -473,7 +473,7 @@ multiclass instantiations.
|
||||
the class list must start after the last multiclass, and there must be at least
|
||||
one multiclass before them.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class XD { bits<4> Prefix = 11; }
|
||||
class XS { bits<4> Prefix = 12; }
|
||||
@ -516,7 +516,7 @@ specified file in place of the include directive. The filename should be
|
||||
specified as a double quoted string immediately after the '``include``' keyword.
|
||||
Example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
include "foo.td"
|
||||
|
||||
@ -532,7 +532,7 @@ commonality from the records.
|
||||
File-scope "let" expressions take a comma-separated list of bindings to apply,
|
||||
and one or more records to bind the values in. Here are some examples:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in
|
||||
def RET : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>;
|
||||
@ -559,7 +559,7 @@ ways to factor out commonality from the records, specially if using several
|
||||
levels of multiclass instantiations. This also avoids the need of using "let"
|
||||
expressions within subsequent records inside a multiclass.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
multiclass basic_r<bits<4> opc> {
|
||||
let Predicates = [HasSSE2] in {
|
||||
@ -587,7 +587,7 @@ TableGen supports the '``foreach``' block, which textually replicates the loop
|
||||
body, substituting iterator values for iterator references in the body.
|
||||
Example:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
foreach i = [0, 1, 2, 3] in {
|
||||
def R#i : Register<...>;
|
||||
@ -598,7 +598,7 @@ This will create objects ``R0``, ``R1``, ``R2`` and ``R3``. ``foreach`` blocks
|
||||
may be nested. If there is only one item in the body the braces may be
|
||||
elided:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
foreach i = [0, 1, 2, 3] in
|
||||
def R#i : Register<...>;
|
||||
|
@ -90,7 +90,7 @@ of the classes, then all of the definitions. This is a good way to see what the
|
||||
various definitions expand to fully. Running this on the ``X86.td`` file prints
|
||||
this (at the time of this writing):
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
...
|
||||
def ADD32rr { // Instruction X86Inst I
|
||||
@ -155,7 +155,7 @@ by the code generator, and specifying it all manually would be unmaintainable,
|
||||
prone to bugs, and tiring to do in the first place. Because we are using
|
||||
TableGen, all of the information was derived from the following definition:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
let Defs = [EFLAGS],
|
||||
isCommutable = 1, // X = ADD Y,Z --> X = ADD Z,Y
|
||||
@ -201,7 +201,7 @@ TableGen.
|
||||
**TableGen definitions** are the concrete form of 'records'. These generally do
|
||||
not have any undefined values, and are marked with the '``def``' keyword.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
|
||||
"Enable ARMv8 FP">;
|
||||
@ -220,7 +220,7 @@ floating point instructions in the X86 backend). TableGen keeps track of all of
|
||||
the classes that are used to build up a definition, so the backend can find all
|
||||
definitions of a particular class, such as "Instruction".
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class ProcNoItin<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
@ -235,7 +235,7 @@ If a multiclass inherits from another multiclass, the definitions in the
|
||||
sub-multiclass become part of the current multiclass, as if they were declared
|
||||
in the current multiclass.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
|
||||
dag address, ValueType sty> {
|
||||
|
@ -345,7 +345,7 @@ to define an object for each register. The specified string ``n`` becomes the
|
||||
``Name`` of the register. The basic ``Register`` object does not have any
|
||||
subregisters and does not specify any aliases.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class Register<string n> {
|
||||
string Namespace = "";
|
||||
@ -361,7 +361,7 @@ subregisters and does not specify any aliases.
|
||||
For example, in the ``X86RegisterInfo.td`` file, there are register definitions
|
||||
that utilize the ``Register`` class, such as:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
|
||||
|
||||
@ -414,7 +414,7 @@ classes. In ``Target.td``, the ``Register`` class is the base for the
|
||||
``RegisterWithSubRegs`` class that is used to define registers that need to
|
||||
specify subregisters in the ``SubRegs`` list, as shown here:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
|
||||
let SubRegs = subregs;
|
||||
@ -427,7 +427,7 @@ feature common to these subclasses. Note the use of "``let``" expressions to
|
||||
override values that are initially defined in a superclass (such as ``SubRegs``
|
||||
field in the ``Rd`` class).
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class SparcReg<string n> : Register<n> {
|
||||
field bits<5> Num;
|
||||
@ -452,7 +452,7 @@ field in the ``Rd`` class).
|
||||
In the ``SparcRegisterInfo.td`` file, there are register definitions that
|
||||
utilize these subclasses of ``Register``, such as:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
|
||||
def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
|
||||
@ -478,7 +478,7 @@ default allocation order of the registers. A target description file
|
||||
``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes
|
||||
using the following class:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class RegisterClass<string namespace,
|
||||
list<ValueType> regTypes, int alignment, dag regList> {
|
||||
@ -532,7 +532,7 @@ defines a group of 32 single-precision floating-point registers (``F0`` to
|
||||
``F31``); ``DFPRegs`` defines a group of 16 double-precision registers
|
||||
(``D0-D15``).
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
// F0, F1, F2, ..., F31
|
||||
def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
|
||||
@ -703,7 +703,7 @@ which describes one instruction. An instruction descriptor defines:
|
||||
The Instruction class (defined in ``Target.td``) is mostly used as a base for
|
||||
more complex instruction classes.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class Instruction {
|
||||
string Namespace = "";
|
||||
@ -760,7 +760,7 @@ specific operation value for ``LD``/Load Word. The third parameter is the
|
||||
output destination, which is a register operand and defined in the ``Register``
|
||||
target description file (``IntRegs``).
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
|
||||
"ld [$addr], $dst",
|
||||
@ -769,7 +769,7 @@ target description file (``IntRegs``).
|
||||
The fourth parameter is the input source, which uses the address operand
|
||||
``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def MEMrr : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
@ -788,7 +788,7 @@ immediate value operands. For example, to perform a Load Integer instruction
|
||||
for a Word from an immediate operand to a register, the following instruction
|
||||
class is defined:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
|
||||
"ld [$addr], $dst",
|
||||
@ -801,7 +801,7 @@ creation of templates to define several instruction classes at once (using the
|
||||
pattern ``F3_12`` is defined to create 2 instruction classes each time
|
||||
``F3_12`` is invoked:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
|
||||
def rr : F3_1 <2, Op3Val,
|
||||
@ -818,7 +818,7 @@ So when the ``defm`` directive is used for the ``XOR`` and ``ADD``
|
||||
instructions, as seen below, it creates four instruction objects: ``XORrr``,
|
||||
``XORri``, ``ADDrr``, and ``ADDri``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
defm XOR : F3_12<"xor", 0b000011, xor>;
|
||||
defm ADD : F3_12<"add", 0b000000, add>;
|
||||
@ -830,7 +830,7 @@ For example, the 10\ :sup:`th` bit represents the "greater than" condition for
|
||||
integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for
|
||||
floats.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ICC_NE : ICC_VAL< 9>; // Not Equal
|
||||
def ICC_E : ICC_VAL< 1>; // Equal
|
||||
@ -855,7 +855,7 @@ order they are defined. Fields are bound when they are assigned a value. For
|
||||
example, the Sparc target defines the ``XNORrr`` instruction as a ``F3_1``
|
||||
format instruction having three operands.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def XNORrr : F3_1<2, 0b000111,
|
||||
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
|
||||
@ -865,7 +865,7 @@ format instruction having three operands.
|
||||
The instruction templates in ``SparcInstrFormats.td`` show the base class for
|
||||
``F3_1`` is ``InstSP``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
|
||||
field bits<32> Inst;
|
||||
@ -880,7 +880,7 @@ The instruction templates in ``SparcInstrFormats.td`` show the base class for
|
||||
|
||||
``InstSP`` leaves the ``op`` field unbound.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: InstSP<outs, ins, asmstr, pattern> {
|
||||
@ -897,7 +897,7 @@ The instruction templates in ``SparcInstrFormats.td`` show the base class for
|
||||
fields. ``F3`` format instructions will bind the operands ``rd``, ``op3``, and
|
||||
``rs1`` fields.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
|
||||
string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
|
||||
@ -925,7 +925,7 @@ TableGen definition will add all of its operands to an enumeration in the
|
||||
llvm::XXX:OpName namespace and also add an entry for it into the OperandMap
|
||||
table, which can be queried using getNamedOperandIdx()
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
int DstIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::dst); // => 0
|
||||
int BIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::b); // => 1
|
||||
@ -972,7 +972,7 @@ For example, the X86 backend defines ``brtarget`` and ``brtarget8``, both
|
||||
instances of the TableGen ``Operand`` class, which represent branch target
|
||||
operands:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def brtarget : Operand<OtherVT>;
|
||||
def brtarget8 : Operand<OtherVT>;
|
||||
@ -1222,14 +1222,14 @@ definitions in ``XXXInstrInfo.td``. For example, in ``SparcInstrInfo.td``,
|
||||
this entry defines a register store operation, and the last parameter describes
|
||||
a pattern with the store DAG operator.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
|
||||
"st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>;
|
||||
|
||||
``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
|
||||
|
||||
@ -1240,7 +1240,7 @@ defined in an implementation of the Instructor Selector (such as
|
||||
In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined
|
||||
below:
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(st node:$val, node:$ptr), [{
|
||||
@ -1458,7 +1458,7 @@ if the current argument is of type ``f32`` or ``f64``), then the action is
|
||||
performed. In this case, the ``CCAssignToReg`` action assigns the argument
|
||||
value to the first available register: either ``R0`` or ``R1``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
|
||||
|
||||
@ -1469,7 +1469,7 @@ which registers are used for specified scalar return types. A single-precision
|
||||
float is returned to register ``F0``, and a double-precision float goes to
|
||||
register ``D0``. A 32-bit integer is returned in register ``I0`` or ``I1``.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def RetCC_Sparc32 : CallingConv<[
|
||||
CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
|
||||
@ -1484,7 +1484,7 @@ the size of the slot, and the second parameter, also 4, indicates the stack
|
||||
alignment along 4-byte units. (Special cases: if size is zero, then the ABI
|
||||
size is used; if alignment is zero, then the ABI alignment is used.)
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def CC_Sparc32 : CallingConv<[
|
||||
// All arguments get passed in integer registers if there is space.
|
||||
@ -1499,7 +1499,7 @@ the following example (in ``X86CallingConv.td``), the definition of
|
||||
assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is
|
||||
invoked.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def RetCC_X86_32_C : CallingConv<[
|
||||
CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
|
||||
@ -1514,7 +1514,7 @@ then a specified action is invoked. In the following example (in
|
||||
``RetCC_X86_32_Fast`` is invoked. If the ``SSECall`` calling convention is in
|
||||
use, then ``RetCC_X86_32_SSE`` is invoked.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def RetCC_X86_32 : CallingConv<[
|
||||
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
|
||||
@ -1682,7 +1682,7 @@ feature, the value of the attribute, and a description of the feature. (The
|
||||
fifth parameter is a list of features whose presence is implied, and its
|
||||
default value is an empty array.)
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class SubtargetFeature<string n, string a, string v, string d,
|
||||
list<SubtargetFeature> i = []> {
|
||||
@ -1696,7 +1696,7 @@ default value is an empty array.)
|
||||
In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the
|
||||
following features.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
|
||||
"Enable SPARC-V9 instructions">;
|
||||
@ -1710,7 +1710,7 @@ Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to
|
||||
define particular SPARC processor subtypes that may have the previously
|
||||
described features.
|
||||
|
||||
.. code-block:: llvm
|
||||
.. code-block:: text
|
||||
|
||||
class Proc<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
|
@ -747,7 +747,7 @@ template parameter is the name of the pass that is to be used on the command
|
||||
line to specify that the pass should be added to a program (for example, with
|
||||
:program:`opt` or :program:`bugpoint`). The first argument is the name of the
|
||||
pass, which is to be used for the :option:`-help` output of programs, as well
|
||||
as for debug output generated by the :option:`--debug-pass` option.
|
||||
as for debug output generated by the `--debug-pass` option.
|
||||
|
||||
If you want your pass to be easily dumpable, you should implement the virtual
|
||||
print method:
|
||||
|
@ -1,11 +1,6 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
|
@ -2014,6 +2014,9 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
|
||||
|
||||
void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef A);
|
||||
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx);
|
||||
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef *Attrs);
|
||||
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
|
||||
LLVMAttributeIndex Idx,
|
||||
unsigned KindID);
|
||||
@ -2600,6 +2603,9 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
|
||||
|
||||
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef A);
|
||||
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C, LLVMAttributeIndex Idx);
|
||||
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef *Attrs);
|
||||
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
|
||||
LLVMAttributeIndex Idx,
|
||||
unsigned KindID);
|
||||
|
@ -27,19 +27,24 @@ template<class GraphType>
|
||||
struct GraphTraits {
|
||||
// Elements to provide:
|
||||
|
||||
// NOTICE: We are in a transition from migration interfaces that require
|
||||
// NodeType *, to NodeRef. NodeRef is required to be cheap to copy, but does
|
||||
// not have to be a raw pointer. In the transition, user should define
|
||||
// NodeType, and NodeRef = NodeType *.
|
||||
//
|
||||
// typedef NodeType - Type of Node in the graph
|
||||
// typedef NodeRef - NodeType *
|
||||
// typedef ChildIteratorType - Type used to iterate over children in graph
|
||||
|
||||
// static NodeType *getEntryNode(const GraphType &)
|
||||
// static NodeRef getEntryNode(const GraphType &)
|
||||
// Return the entry node of the graph
|
||||
|
||||
// static ChildIteratorType child_begin(NodeType *)
|
||||
// static ChildIteratorType child_end (NodeType *)
|
||||
// static ChildIteratorType child_begin(NodeRef)
|
||||
// static ChildIteratorType child_end (NodeRef)
|
||||
// Return iterators that point to the beginning and ending of the child
|
||||
// node list for the specified node.
|
||||
//
|
||||
|
||||
|
||||
// typedef ...iterator nodes_iterator;
|
||||
// static nodes_iterator nodes_begin(GraphType *G)
|
||||
// static nodes_iterator nodes_end (GraphType *G)
|
||||
@ -57,7 +62,7 @@ struct GraphTraits {
|
||||
// your argument to XXX_begin(...) is unknown or needs to have the proper .h
|
||||
// file #include'd.
|
||||
//
|
||||
typedef typename GraphType::UnknownGraphTypeError NodeType;
|
||||
typedef typename GraphType::UnknownGraphTypeError NodeRef;
|
||||
};
|
||||
|
||||
|
||||
|
@ -37,23 +37,22 @@ namespace llvm {
|
||||
/// build up a vector of nodes in a particular SCC. Note that it is a forward
|
||||
/// iterator and thus you cannot backtrack or re-visit nodes.
|
||||
template <class GraphT, class GT = GraphTraits<GraphT>>
|
||||
class scc_iterator
|
||||
: public iterator_facade_base<
|
||||
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
|
||||
const std::vector<typename GT::NodeType *>, ptrdiff_t> {
|
||||
typedef typename GT::NodeType NodeType;
|
||||
class scc_iterator : public iterator_facade_base<
|
||||
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
|
||||
const std::vector<typename GT::NodeRef>, ptrdiff_t> {
|
||||
typedef typename GT::NodeRef NodeRef;
|
||||
typedef typename GT::ChildIteratorType ChildItTy;
|
||||
typedef std::vector<NodeType *> SccTy;
|
||||
typedef std::vector<NodeRef> SccTy;
|
||||
typedef typename scc_iterator::reference reference;
|
||||
|
||||
/// Element of VisitStack during DFS.
|
||||
struct StackElement {
|
||||
NodeType *Node; ///< The current node pointer.
|
||||
NodeRef Node; ///< The current node pointer.
|
||||
ChildItTy NextChild; ///< The next child, modified inplace during DFS.
|
||||
unsigned MinVisited; ///< Minimum uplink value of all children of Node.
|
||||
|
||||
StackElement(NodeType *Node, const ChildItTy &Child, unsigned Min)
|
||||
: Node(Node), NextChild(Child), MinVisited(Min) {}
|
||||
StackElement(NodeRef Node, const ChildItTy &Child, unsigned Min)
|
||||
: Node(Node), NextChild(Child), MinVisited(Min) {}
|
||||
|
||||
bool operator==(const StackElement &Other) const {
|
||||
return Node == Other.Node &&
|
||||
@ -67,10 +66,10 @@ class scc_iterator
|
||||
///
|
||||
/// nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
|
||||
unsigned visitNum;
|
||||
DenseMap<NodeType *, unsigned> nodeVisitNumbers;
|
||||
DenseMap<NodeRef, unsigned> nodeVisitNumbers;
|
||||
|
||||
/// Stack holding nodes of the SCC.
|
||||
std::vector<NodeType *> SCCNodeStack;
|
||||
std::vector<NodeRef> SCCNodeStack;
|
||||
|
||||
/// The current SCC, retrieved using operator*().
|
||||
SccTy CurrentSCC;
|
||||
@ -80,7 +79,7 @@ class scc_iterator
|
||||
std::vector<StackElement> VisitStack;
|
||||
|
||||
/// A single "visit" within the non-recursive DFS traversal.
|
||||
void DFSVisitOne(NodeType *N);
|
||||
void DFSVisitOne(NodeRef N);
|
||||
|
||||
/// The stack-based DFS traversal; defined below.
|
||||
void DFSVisitChildren();
|
||||
@ -88,7 +87,7 @@ class scc_iterator
|
||||
/// Compute the next SCC using the DFS traversal.
|
||||
void GetNextSCC();
|
||||
|
||||
scc_iterator(NodeType *entryN) : visitNum(0) {
|
||||
scc_iterator(NodeRef entryN) : visitNum(0) {
|
||||
DFSVisitOne(entryN);
|
||||
GetNextSCC();
|
||||
}
|
||||
@ -131,7 +130,7 @@ public:
|
||||
|
||||
/// This informs the \c scc_iterator that the specified \c Old node
|
||||
/// has been deleted, and \c New is to be used in its place.
|
||||
void ReplaceNode(NodeType *Old, NodeType *New) {
|
||||
void ReplaceNode(NodeRef Old, NodeRef New) {
|
||||
assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
|
||||
nodeVisitNumbers[New] = nodeVisitNumbers[Old];
|
||||
nodeVisitNumbers.erase(Old);
|
||||
@ -139,7 +138,7 @@ public:
|
||||
};
|
||||
|
||||
template <class GraphT, class GT>
|
||||
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeType *N) {
|
||||
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeRef N) {
|
||||
++visitNum;
|
||||
nodeVisitNumbers[N] = visitNum;
|
||||
SCCNodeStack.push_back(N);
|
||||
@ -155,8 +154,8 @@ void scc_iterator<GraphT, GT>::DFSVisitChildren() {
|
||||
assert(!VisitStack.empty());
|
||||
while (VisitStack.back().NextChild != GT::child_end(VisitStack.back().Node)) {
|
||||
// TOS has at least one more child so continue DFS
|
||||
NodeType *childN = *VisitStack.back().NextChild++;
|
||||
typename DenseMap<NodeType *, unsigned>::iterator Visited =
|
||||
NodeRef childN = *VisitStack.back().NextChild++;
|
||||
typename DenseMap<NodeRef, unsigned>::iterator Visited =
|
||||
nodeVisitNumbers.find(childN);
|
||||
if (Visited == nodeVisitNumbers.end()) {
|
||||
// this node has never been seen.
|
||||
@ -176,7 +175,7 @@ template <class GraphT, class GT> void scc_iterator<GraphT, GT>::GetNextSCC() {
|
||||
DFSVisitChildren();
|
||||
|
||||
// Pop the leaf on top of the VisitStack.
|
||||
NodeType *visitingN = VisitStack.back().Node;
|
||||
NodeRef visitingN = VisitStack.back().Node;
|
||||
unsigned minVisitNum = VisitStack.back().MinVisited;
|
||||
assert(VisitStack.back().NextChild == GT::child_end(visitingN));
|
||||
VisitStack.pop_back();
|
||||
@ -212,7 +211,7 @@ bool scc_iterator<GraphT, GT>::hasLoop() const {
|
||||
assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
|
||||
if (CurrentSCC.size() > 1)
|
||||
return true;
|
||||
NodeType *N = CurrentSCC.front();
|
||||
NodeRef N = CurrentSCC.front();
|
||||
for (ChildItTy CI = GT::child_begin(N), CE = GT::child_end(N); CI != CE;
|
||||
++CI)
|
||||
if (*CI == N)
|
||||
|
@ -26,10 +26,18 @@
|
||||
#include <memory>
|
||||
#include <utility> // for std::pair
|
||||
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/iterator.h"
|
||||
#include "llvm/ADT/iterator_range.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace detail {
|
||||
|
||||
template <typename RangeT>
|
||||
using IterOfRange = decltype(std::begin(std::declval<RangeT>()));
|
||||
|
||||
} // End detail namespace
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extra additions to <functional>
|
||||
@ -235,6 +243,90 @@ auto reverse(
|
||||
llvm::make_reverse_iterator(std::begin(C)));
|
||||
}
|
||||
|
||||
/// An iterator adaptor that filters the elements of given inner iterators.
|
||||
///
|
||||
/// The predicate parameter should be a callable object that accepts the wrapped
|
||||
/// iterator's reference type and returns a bool. When incrementing or
|
||||
/// decrementing the iterator, it will call the predicate on each element and
|
||||
/// skip any where it returns false.
|
||||
///
|
||||
/// \code
|
||||
/// int A[] = { 1, 2, 3, 4 };
|
||||
/// auto R = make_filter_range(A, [](int N) { return N % 2 == 1; });
|
||||
/// // R contains { 1, 3 }.
|
||||
/// \endcode
|
||||
template <typename WrappedIteratorT, typename PredicateT>
|
||||
class filter_iterator
|
||||
: public iterator_adaptor_base<
|
||||
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
|
||||
typename std::common_type<
|
||||
std::forward_iterator_tag,
|
||||
typename std::iterator_traits<
|
||||
WrappedIteratorT>::iterator_category>::type> {
|
||||
using BaseT = iterator_adaptor_base<
|
||||
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
|
||||
typename std::common_type<
|
||||
std::forward_iterator_tag,
|
||||
typename std::iterator_traits<WrappedIteratorT>::iterator_category>::
|
||||
type>;
|
||||
|
||||
struct PayloadType {
|
||||
WrappedIteratorT End;
|
||||
PredicateT Pred;
|
||||
};
|
||||
|
||||
Optional<PayloadType> Payload;
|
||||
|
||||
void findNextValid() {
|
||||
assert(Payload && "Payload should be engaged when findNextValid is called");
|
||||
while (this->I != Payload->End && !Payload->Pred(*this->I))
|
||||
BaseT::operator++();
|
||||
}
|
||||
|
||||
// Construct the begin iterator. The begin iterator requires to know where end
|
||||
// is, so that it can properly stop when it hits end.
|
||||
filter_iterator(WrappedIteratorT Begin, WrappedIteratorT End, PredicateT Pred)
|
||||
: BaseT(std::move(Begin)),
|
||||
Payload(PayloadType{std::move(End), std::move(Pred)}) {
|
||||
findNextValid();
|
||||
}
|
||||
|
||||
// Construct the end iterator. It's not incrementable, so Payload doesn't
|
||||
// have to be engaged.
|
||||
filter_iterator(WrappedIteratorT End) : BaseT(End) {}
|
||||
|
||||
public:
|
||||
using BaseT::operator++;
|
||||
|
||||
filter_iterator &operator++() {
|
||||
BaseT::operator++();
|
||||
findNextValid();
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename RT, typename PT>
|
||||
friend iterator_range<filter_iterator<detail::IterOfRange<RT>, PT>>
|
||||
make_filter_range(RT &&, PT);
|
||||
};
|
||||
|
||||
/// Convenience function that takes a range of elements and a predicate,
|
||||
/// and return a new filter_iterator range.
|
||||
///
|
||||
/// FIXME: Currently if RangeT && is a rvalue reference to a temporary, the
|
||||
/// lifetime of that temporary is not kept by the returned range object, and the
|
||||
/// temporary is going to be dropped on the floor after the make_iterator_range
|
||||
/// full expression that contains this function call.
|
||||
template <typename RangeT, typename PredicateT>
|
||||
iterator_range<filter_iterator<detail::IterOfRange<RangeT>, PredicateT>>
|
||||
make_filter_range(RangeT &&Range, PredicateT Pred) {
|
||||
using FilterIteratorT =
|
||||
filter_iterator<detail::IterOfRange<RangeT>, PredicateT>;
|
||||
return make_range(FilterIteratorT(std::begin(std::forward<RangeT>(Range)),
|
||||
std::end(std::forward<RangeT>(Range)),
|
||||
std::move(Pred)),
|
||||
FilterIteratorT(std::end(std::forward<RangeT>(Range))));
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extra additions to <utility>
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -174,6 +174,7 @@ public:
|
||||
UnknownEnvironment,
|
||||
|
||||
GNU,
|
||||
GNUABI64,
|
||||
GNUEABI,
|
||||
GNUEABIHF,
|
||||
GNUX32,
|
||||
@ -476,8 +477,9 @@ public:
|
||||
|
||||
bool isGNUEnvironment() const {
|
||||
EnvironmentType Env = getEnvironment();
|
||||
return Env == Triple::GNU || Env == Triple::GNUEABI ||
|
||||
Env == Triple::GNUEABIHF || Env == Triple::GNUX32;
|
||||
return Env == Triple::GNU || Env == Triple::GNUABI64 ||
|
||||
Env == Triple::GNUEABI || Env == Triple::GNUEABIHF ||
|
||||
Env == Triple::GNUX32;
|
||||
}
|
||||
|
||||
/// Checks if the environment could be MSVC.
|
||||
|
@ -155,7 +155,14 @@ template <
|
||||
typename T = typename std::iterator_traits<WrappedIteratorT>::value_type,
|
||||
typename DifferenceTypeT =
|
||||
typename std::iterator_traits<WrappedIteratorT>::difference_type,
|
||||
typename PointerT = T *, typename ReferenceT = T &,
|
||||
typename PointerT = typename std::conditional<
|
||||
std::is_same<T, typename std::iterator_traits<
|
||||
WrappedIteratorT>::value_type>::value,
|
||||
typename std::iterator_traits<WrappedIteratorT>::pointer, T *>::type,
|
||||
typename ReferenceT = typename std::conditional<
|
||||
std::is_same<T, typename std::iterator_traits<
|
||||
WrappedIteratorT>::value_type>::value,
|
||||
typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type,
|
||||
// Don't provide these, they are mostly to act as aliases below.
|
||||
typename WrappedTraitsT = std::iterator_traits<WrappedIteratorT>>
|
||||
class iterator_adaptor_base
|
||||
@ -168,15 +175,7 @@ protected:
|
||||
|
||||
iterator_adaptor_base() = default;
|
||||
|
||||
template <typename U>
|
||||
explicit iterator_adaptor_base(
|
||||
U &&u,
|
||||
typename std::enable_if<
|
||||
!std::is_base_of<typename std::remove_cv<
|
||||
typename std::remove_reference<U>::type>::type,
|
||||
DerivedT>::value,
|
||||
int>::type = 0)
|
||||
: I(std::forward<U &&>(u)) {}
|
||||
explicit iterator_adaptor_base(WrappedIteratorT u) : I(std::move(u)) {}
|
||||
|
||||
const WrappedIteratorT &wrapped() const { return I; }
|
||||
|
||||
|
@ -410,6 +410,7 @@ public:
|
||||
// traversals.
|
||||
template <> struct GraphTraits<CallGraphNode *> {
|
||||
typedef CallGraphNode NodeType;
|
||||
typedef CallGraphNode *NodeRef;
|
||||
|
||||
typedef CallGraphNode::CallRecord CGNPairTy;
|
||||
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
|
||||
@ -431,6 +432,7 @@ template <> struct GraphTraits<CallGraphNode *> {
|
||||
|
||||
template <> struct GraphTraits<const CallGraphNode *> {
|
||||
typedef const CallGraphNode NodeType;
|
||||
typedef const CallGraphNode *NodeRef;
|
||||
|
||||
typedef CallGraphNode::CallRecord CGNPairTy;
|
||||
typedef std::pointer_to_unary_function<CGNPairTy, const CallGraphNode *>
|
||||
|
@ -196,6 +196,13 @@ namespace llvm {
|
||||
/// block.
|
||||
Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
|
||||
|
||||
/// \brief Insert code to directly compute the specified SCEV expression
|
||||
/// into the program. The inserted code is inserted into the SCEVExpander's
|
||||
/// current insertion point. If a type is specified, the result will be
|
||||
/// expanded to have that type, with a cast if necessary.
|
||||
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
|
||||
|
||||
|
||||
/// \brief Generates a code sequence that evaluates this predicate.
|
||||
/// The inserted instructions will be at position \p Loc.
|
||||
/// The result will be of type i1 and will have a value of 0 when the
|
||||
@ -253,6 +260,15 @@ namespace llvm {
|
||||
|
||||
void enableLSRMode() { LSRMode = true; }
|
||||
|
||||
/// \brief Set the current insertion point. This is useful if multiple calls
|
||||
/// to expandCodeFor() are going to be made with the same insert point and
|
||||
/// the insert point may be moved during one of the expansions (e.g. if the
|
||||
/// insert point is not a block terminator).
|
||||
void setInsertPoint(Instruction *IP) {
|
||||
assert(IP);
|
||||
Builder.SetInsertPoint(IP);
|
||||
}
|
||||
|
||||
/// \brief Clear the current insertion point. This is useful if the
|
||||
/// instruction that had been serving as the insertion point may have been
|
||||
/// deleted.
|
||||
@ -313,12 +329,6 @@ namespace llvm {
|
||||
|
||||
Value *expand(const SCEV *S);
|
||||
|
||||
/// \brief Insert code to directly compute the specified SCEV expression
|
||||
/// into the program. The inserted code is inserted into the SCEVExpander's
|
||||
/// current insertion point. If a type is specified, the result will be
|
||||
/// expanded to have that type, with a cast if necessary.
|
||||
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
|
||||
|
||||
/// \brief Determine the most "relevant" loop for the given SCEV.
|
||||
const Loop *getRelevantLoop(const SCEV *);
|
||||
|
||||
|
@ -740,6 +740,7 @@ struct MBB2NumberFunctor :
|
||||
|
||||
template <> struct GraphTraits<MachineBasicBlock *> {
|
||||
typedef MachineBasicBlock NodeType;
|
||||
typedef MachineBasicBlock *NodeRef;
|
||||
typedef MachineBasicBlock::succ_iterator ChildIteratorType;
|
||||
|
||||
static NodeType *getEntryNode(MachineBasicBlock *BB) { return BB; }
|
||||
@ -753,6 +754,7 @@ template <> struct GraphTraits<MachineBasicBlock *> {
|
||||
|
||||
template <> struct GraphTraits<const MachineBasicBlock *> {
|
||||
typedef const MachineBasicBlock NodeType;
|
||||
typedef const MachineBasicBlock *NodeRef;
|
||||
typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
|
||||
|
||||
static NodeType *getEntryNode(const MachineBasicBlock *BB) { return BB; }
|
||||
@ -772,6 +774,7 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
|
||||
//
|
||||
template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
|
||||
typedef MachineBasicBlock NodeType;
|
||||
typedef MachineBasicBlock *NodeRef;
|
||||
typedef MachineBasicBlock::pred_iterator ChildIteratorType;
|
||||
static NodeType *getEntryNode(Inverse<MachineBasicBlock *> G) {
|
||||
return G.Graph;
|
||||
@ -786,6 +789,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
|
||||
|
||||
template <> struct GraphTraits<Inverse<const MachineBasicBlock*> > {
|
||||
typedef const MachineBasicBlock NodeType;
|
||||
typedef const MachineBasicBlock *NodeRef;
|
||||
typedef MachineBasicBlock::const_pred_iterator ChildIteratorType;
|
||||
static NodeType *getEntryNode(Inverse<const MachineBasicBlock*> G) {
|
||||
return G.Graph;
|
||||
|
@ -210,6 +210,7 @@ public:
|
||||
private:
|
||||
friend class AttrBuilder;
|
||||
friend class AttributeSetImpl;
|
||||
friend class AttributeSetNode;
|
||||
template <typename Ty> friend struct DenseMapInfo;
|
||||
|
||||
/// \brief The attributes that we are managing. This can be null to represent
|
||||
|
@ -155,6 +155,7 @@ struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
|
||||
|
||||
template <> struct GraphTraits<BasicBlock*> {
|
||||
typedef BasicBlock NodeType;
|
||||
typedef BasicBlock *NodeRef;
|
||||
typedef succ_iterator ChildIteratorType;
|
||||
|
||||
static NodeType *getEntryNode(BasicBlock *BB) { return BB; }
|
||||
@ -168,6 +169,7 @@ template <> struct GraphTraits<BasicBlock*> {
|
||||
|
||||
template <> struct GraphTraits<const BasicBlock*> {
|
||||
typedef const BasicBlock NodeType;
|
||||
typedef const BasicBlock *NodeRef;
|
||||
typedef succ_const_iterator ChildIteratorType;
|
||||
|
||||
static NodeType *getEntryNode(const BasicBlock *BB) { return BB; }
|
||||
@ -187,6 +189,7 @@ template <> struct GraphTraits<const BasicBlock*> {
|
||||
//
|
||||
template <> struct GraphTraits<Inverse<BasicBlock*> > {
|
||||
typedef BasicBlock NodeType;
|
||||
typedef BasicBlock *NodeRef;
|
||||
typedef pred_iterator ChildIteratorType;
|
||||
static NodeType *getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
|
||||
static inline ChildIteratorType child_begin(NodeType *N) {
|
||||
@ -199,6 +202,7 @@ template <> struct GraphTraits<Inverse<BasicBlock*> > {
|
||||
|
||||
template <> struct GraphTraits<Inverse<const BasicBlock*> > {
|
||||
typedef const BasicBlock NodeType;
|
||||
typedef const BasicBlock *NodeRef;
|
||||
typedef const_pred_iterator ChildIteratorType;
|
||||
static NodeType *getEntryNode(Inverse<const BasicBlock*> G) {
|
||||
return G.Graph;
|
||||
|
@ -479,6 +479,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
|
||||
@ -1512,8 +1514,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector bit test
|
||||
|
@ -2349,6 +2349,10 @@ public:
|
||||
/// from getBooleanContents().
|
||||
bool isConstFalseVal(const SDNode *N) const;
|
||||
|
||||
/// Return a constant of type VT that contains a true value that respects
|
||||
/// getBooleanContents()
|
||||
SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
|
||||
|
||||
/// Return if \p N is a True value when extended to \p VT.
|
||||
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;
|
||||
|
||||
|
@ -623,6 +623,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
|
||||
typedef bfi_detail::IrreducibleGraph GraphT;
|
||||
|
||||
typedef const GraphT::IrrNode NodeType;
|
||||
typedef const GraphT::IrrNode *NodeRef;
|
||||
typedef GraphT::IrrNode::iterator ChildIteratorType;
|
||||
|
||||
static const NodeType *getEntryNode(const GraphT &G) {
|
||||
|
@ -1424,8 +1424,8 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
|
||||
/// integer type Ty is used to select how many bits are available for the
|
||||
/// result. Returns null if the conversion cannot be performed, otherwise
|
||||
/// returns the Constant value resulting from the conversion.
|
||||
Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
|
||||
Type *Ty) {
|
||||
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
|
||||
Type *Ty) {
|
||||
// All of these conversion intrinsics form an integer of at most 64bits.
|
||||
unsigned ResultWidth = Ty->getIntegerBitWidth();
|
||||
assert(ResultWidth <= 64 &&
|
||||
@ -1438,7 +1438,8 @@ Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
|
||||
APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
|
||||
/*isSigned=*/true, mode,
|
||||
&isExact);
|
||||
if (status != APFloat::opOK && status != APFloat::opInexact)
|
||||
if (status != APFloat::opOK &&
|
||||
(!roundTowardZero || status != APFloat::opInexact))
|
||||
return nullptr;
|
||||
return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
|
||||
}
|
||||
@ -1676,17 +1677,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
|
||||
case Intrinsic::x86_sse2_cvtsd2si:
|
||||
case Intrinsic::x86_sse2_cvtsd2si64:
|
||||
if (ConstantFP *FPOp =
|
||||
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
|
||||
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
|
||||
/*roundTowardZero=*/false, Ty);
|
||||
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
|
||||
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
|
||||
/*roundTowardZero=*/false, Ty);
|
||||
case Intrinsic::x86_sse_cvttss2si:
|
||||
case Intrinsic::x86_sse_cvttss2si64:
|
||||
case Intrinsic::x86_sse2_cvttsd2si:
|
||||
case Intrinsic::x86_sse2_cvttsd2si64:
|
||||
if (ConstantFP *FPOp =
|
||||
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
|
||||
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
|
||||
/*roundTowardZero=*/true, Ty);
|
||||
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
|
||||
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
|
||||
/*roundTowardZero=*/true, Ty);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3400,7 +3400,10 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
|
||||
return TrueVal;
|
||||
|
||||
if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
|
||||
unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
|
||||
// FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
|
||||
// decomposeBitTestICmp() might help.
|
||||
unsigned BitWidth =
|
||||
Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
|
||||
ICmpInst::Predicate Pred = ICI->getPredicate();
|
||||
Value *CmpLHS = ICI->getOperand(0);
|
||||
Value *CmpRHS = ICI->getOperand(1);
|
||||
@ -4274,7 +4277,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
|
||||
|
||||
// Gracefully handle edge cases where the instruction is not wired into any
|
||||
// parent block.
|
||||
if (I->getParent())
|
||||
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
|
||||
!I->mayHaveSideEffects())
|
||||
I->eraseFromParent();
|
||||
} else {
|
||||
Worklist.insert(I);
|
||||
@ -4302,7 +4306,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
|
||||
|
||||
// Gracefully handle edge cases where the instruction is not wired into any
|
||||
// parent block.
|
||||
if (I->getParent())
|
||||
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
|
||||
!I->mayHaveSideEffects())
|
||||
I->eraseFromParent();
|
||||
}
|
||||
return Simplified;
|
||||
|
@ -115,13 +115,19 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) {
|
||||
// We might have a vector load from an array. FIXME: for now we just bail
|
||||
// out in this case, but we should be able to resolve and simplify such
|
||||
// loads.
|
||||
if(CDS->getElementType() != I.getType())
|
||||
if (CDS->getElementType() != I.getType())
|
||||
return false;
|
||||
|
||||
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
|
||||
if (SimplifiedAddrOp->getValue().getActiveBits() >= 64)
|
||||
unsigned ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
|
||||
if (SimplifiedAddrOp->getValue().getActiveBits() > 64)
|
||||
return false;
|
||||
int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
|
||||
int64_t SimplifiedAddrOpV = SimplifiedAddrOp->getSExtValue();
|
||||
if (SimplifiedAddrOpV < 0) {
|
||||
// FIXME: For now we conservatively ignore out of bound accesses, but
|
||||
// we're allowed to perform the optimization in this case.
|
||||
return false;
|
||||
}
|
||||
uint64_t Index = static_cast<uint64_t>(SimplifiedAddrOpV) / ElemSize;
|
||||
if (Index >= CDS->getNumElements()) {
|
||||
// FIXME: For now we conservatively ignore out of bound accesses, but
|
||||
// we're allowed to perform the optimization in this case.
|
||||
|
@ -1610,8 +1610,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
|
||||
|
||||
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
|
||||
Instruction *IP) {
|
||||
assert(IP);
|
||||
Builder.SetInsertPoint(IP);
|
||||
setInsertPoint(IP);
|
||||
return expandCodeFor(SH, Ty);
|
||||
}
|
||||
|
||||
|
@ -214,10 +214,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
|
||||
}
|
||||
|
||||
TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
|
||||
// It's possible to ask for the FuncId of a function which doesn't have a
|
||||
// subprogram: inlining a function with debug info into a function with none.
|
||||
if (!SP)
|
||||
return TypeIndex::None();
|
||||
assert(SP);
|
||||
|
||||
// Check if we've already translated this subprogram.
|
||||
auto I = TypeIndices.find({SP, nullptr});
|
||||
@ -621,11 +618,12 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
|
||||
|
||||
std::string FuncName;
|
||||
auto *SP = GV->getSubprogram();
|
||||
assert(SP);
|
||||
setCurrentSubprogram(SP);
|
||||
|
||||
// If we have a display name, build the fully qualified name by walking the
|
||||
// chain of scopes.
|
||||
if (SP != nullptr && !SP->getDisplayName().empty())
|
||||
if (!SP->getDisplayName().empty())
|
||||
FuncName =
|
||||
getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
|
||||
|
||||
@ -864,7 +862,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
|
||||
void CodeViewDebug::beginFunction(const MachineFunction *MF) {
|
||||
assert(!CurFn && "Can't process two functions at once!");
|
||||
|
||||
if (!Asm || !MMI->hasDebugInfo())
|
||||
if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
|
||||
return;
|
||||
|
||||
DebugHandlerBase::beginFunction(MF);
|
||||
@ -1939,7 +1937,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
|
||||
DebugHandlerBase::beginInstruction(MI);
|
||||
|
||||
// Ignore DBG_VALUE locations and function prologue.
|
||||
if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
|
||||
if (!Asm || !CurFn || MI->isDebugValue() ||
|
||||
MI->getFlag(MachineInstr::FrameSetup))
|
||||
return;
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
if (DL == PrevInstLoc || !DL)
|
||||
|
@ -996,6 +996,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
|
||||
MachineBasicBlock *IBB = &*I;
|
||||
MachineBasicBlock *PredBB = &*std::prev(I);
|
||||
MergePotentials.clear();
|
||||
MachineLoop *ML;
|
||||
|
||||
// Bail if merging after placement and IBB is the loop header because
|
||||
// -- If merging predecessors that belong to the same loop as IBB, the
|
||||
// common tail of merged predecessors may become the loop top if block
|
||||
// placement is called again and the predecessors may branch to this common
|
||||
// tail and require more branches. This can be relaxed if
|
||||
// MachineBlockPlacement::findBestLoopTop is more flexible.
|
||||
// --If merging predecessors that do not belong to the same loop as IBB, the
|
||||
// loop info of IBB's loop and the other loops may be affected. Calling the
|
||||
// block placement again may make big change to the layout and eliminate the
|
||||
// reason to do tail merging here.
|
||||
if (AfterBlockPlacement && MLI) {
|
||||
ML = MLI->getLoopFor(IBB);
|
||||
if (ML && IBB == ML->getHeader())
|
||||
continue;
|
||||
}
|
||||
|
||||
for (MachineBasicBlock *PBB : I->predecessors()) {
|
||||
if (MergePotentials.size() == TailMergeThreshold)
|
||||
break;
|
||||
@ -1015,16 +1033,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
|
||||
if (PBB->hasEHPadSuccessor())
|
||||
continue;
|
||||
|
||||
// Bail out if the loop header (IBB) is not the top of the loop chain
|
||||
// after the block placement. Otherwise, the common tail of IBB's
|
||||
// predecessors may become the loop top if block placement is called again
|
||||
// and the predecessors may branch to this common tail.
|
||||
// FIXME: Relaxed this check if the algorithm of finding loop top is
|
||||
// changed in MBP.
|
||||
// After block placement, only consider predecessors that belong to the
|
||||
// same loop as IBB. The reason is the same as above when skipping loop
|
||||
// header.
|
||||
if (AfterBlockPlacement && MLI)
|
||||
if (MachineLoop *ML = MLI->getLoopFor(IBB))
|
||||
if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
|
||||
continue;
|
||||
if (ML != MLI->getLoopFor(PBB))
|
||||
continue;
|
||||
|
||||
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
|
||||
SmallVector<MachineOperand, 4> Cond;
|
||||
|
@ -530,7 +530,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
|
||||
unsigned Align =
|
||||
std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
|
||||
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
|
||||
Align, SSC.getLiveRange(StackGuardSlot));
|
||||
Align, SSC.getFullLiveRange());
|
||||
}
|
||||
|
||||
for (Argument *Arg : ByValArguments) {
|
||||
|
@ -25,7 +25,9 @@ static cl::opt<bool> ClColoring("safe-stack-coloring",
|
||||
cl::Hidden, cl::init(true));
|
||||
|
||||
const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
|
||||
return LiveRanges[AllocaNumbering[AI]];
|
||||
const auto IT = AllocaNumbering.find(AI);
|
||||
assert(IT != AllocaNumbering.end());
|
||||
return LiveRanges[IT->second];
|
||||
}
|
||||
|
||||
bool StackColoring::readMarker(Instruction *I, bool *IsStart) {
|
||||
|
@ -100,7 +100,8 @@ void StackLayout::layoutObject(StackObject &Obj) {
|
||||
}
|
||||
|
||||
// Split starting and ending regions if necessary.
|
||||
for (StackRegion &R : Regions) {
|
||||
for (unsigned i = 0; i < Regions.size(); ++i) {
|
||||
StackRegion &R = Regions[i];
|
||||
if (Start > R.Start && Start < R.End) {
|
||||
StackRegion R0 = R;
|
||||
R.Start = R0.End = Start;
|
||||
|
@ -6198,13 +6198,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
|
||||
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
|
||||
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
|
||||
// Here, T can be 1 or -1, depending on the type of the setcc and
|
||||
// getBooleanContents().
|
||||
unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
|
||||
|
||||
SDLoc DL(N);
|
||||
SDValue NegOne =
|
||||
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
|
||||
// To determine the "true" side of the select, we need to know the high bit
|
||||
// of the value returned by the setcc if it evaluates to true.
|
||||
// If the type of the setcc is i1, then the true case of the select is just
|
||||
// sext(i1 1), that is, -1.
|
||||
// If the type of the setcc is larger (say, i8) then the value of the high
|
||||
// bit depends on getBooleanContents(). So, ask TLI for a real "true" value
|
||||
// of the appropriate width.
|
||||
SDValue ExtTrueVal =
|
||||
(SetCCWidth == 1)
|
||||
? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
|
||||
DL, VT)
|
||||
: TLI.getConstTrueVal(DAG, VT, DL);
|
||||
|
||||
if (SDValue SCC = SimplifySelectCC(
|
||||
DL, N0.getOperand(0), N0.getOperand(1), NegOne,
|
||||
DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
|
||||
DAG.getConstant(0, DL, VT),
|
||||
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
|
||||
return SCC;
|
||||
@ -6215,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
|
||||
TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
|
||||
SDLoc DL(N);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
|
||||
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
|
||||
N0.getOperand(0), N0.getOperand(1), CC);
|
||||
return DAG.getSelect(DL, VT, SetCC,
|
||||
NegOne, DAG.getConstant(0, DL, VT));
|
||||
SDValue SetCC =
|
||||
DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
|
||||
return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
|
||||
DAG.getConstant(0, DL, VT));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6639,19 +6639,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
|
||||
SDNode *FromNode = From.getNode();
|
||||
SDNode *ToNode = To.getNode();
|
||||
ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
|
||||
SmallVector<SDDbgValue *, 2> ClonedDVs;
|
||||
for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
|
||||
I != E; ++I) {
|
||||
SDDbgValue *Dbg = *I;
|
||||
// Only add Dbgvalues attached to same ResNo.
|
||||
if (Dbg->getKind() == SDDbgValue::SDNODE &&
|
||||
Dbg->getResNo() == From.getResNo()) {
|
||||
Dbg->getSDNode() == From.getNode() &&
|
||||
Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
|
||||
assert(FromNode != ToNode &&
|
||||
"Should not transfer Debug Values intranode");
|
||||
SDDbgValue *Clone =
|
||||
getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
|
||||
To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
|
||||
Dbg->getDebugLoc(), Dbg->getOrder());
|
||||
AddDbgValue(Clone, ToNode, false);
|
||||
ClonedDVs.push_back(Clone);
|
||||
Dbg->setIsInvalidated();
|
||||
}
|
||||
}
|
||||
for (SDDbgValue *I : ClonedDVs)
|
||||
AddDbgValue(I, ToNode, false);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -1234,6 +1234,16 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
|
||||
llvm_unreachable("Invalid boolean contents");
|
||||
}
|
||||
|
||||
SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
|
||||
const SDLoc &DL) const {
|
||||
unsigned ElementWidth = VT.getScalarSizeInBits();
|
||||
APInt TrueInt =
|
||||
getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
|
||||
? APInt(ElementWidth, 1)
|
||||
: APInt::getAllOnesValue(ElementWidth);
|
||||
return DAG.getConstant(TrueInt, DL, VT);
|
||||
}
|
||||
|
||||
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
|
||||
if (!N)
|
||||
return false;
|
||||
|
@ -29,7 +29,7 @@
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
||||
@ -539,6 +539,16 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
|
||||
return TRI->regsOverlap(RegA, RegB);
|
||||
}
|
||||
|
||||
// Returns true if Reg is equal or aliased to at least one register in Set.
|
||||
static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
for (unsigned R : Set)
|
||||
if (TRI->regsOverlap(R, Reg))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if it's potentially profitable to commute the two-address
|
||||
/// instruction that's being processed.
|
||||
bool
|
||||
@ -864,9 +874,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
|
||||
// FIXME: Needs more sophisticated heuristics.
|
||||
return false;
|
||||
|
||||
SmallSet<unsigned, 2> Uses;
|
||||
SmallSet<unsigned, 2> Kills;
|
||||
SmallSet<unsigned, 2> Defs;
|
||||
SmallVector<unsigned, 2> Uses;
|
||||
SmallVector<unsigned, 2> Kills;
|
||||
SmallVector<unsigned, 2> Defs;
|
||||
for (const MachineOperand &MO : MI->operands()) {
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
@ -874,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
|
||||
if (!MOReg)
|
||||
continue;
|
||||
if (MO.isDef())
|
||||
Defs.insert(MOReg);
|
||||
Defs.push_back(MOReg);
|
||||
else {
|
||||
Uses.insert(MOReg);
|
||||
Uses.push_back(MOReg);
|
||||
if (MOReg != Reg && (MO.isKill() ||
|
||||
(LIS && isPlainlyKilled(MI, MOReg, LIS))))
|
||||
Kills.insert(MOReg);
|
||||
Kills.push_back(MOReg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -888,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
|
||||
MachineBasicBlock::iterator AfterMI = std::next(Begin);
|
||||
|
||||
MachineBasicBlock::iterator End = AfterMI;
|
||||
while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
|
||||
Defs.insert(End->getOperand(0).getReg());
|
||||
while (End->isCopy() &&
|
||||
regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
|
||||
Defs.push_back(End->getOperand(0).getReg());
|
||||
++End;
|
||||
}
|
||||
|
||||
@ -915,21 +926,21 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
|
||||
if (!MOReg)
|
||||
continue;
|
||||
if (MO.isDef()) {
|
||||
if (Uses.count(MOReg))
|
||||
if (regOverlapsSet(Uses, MOReg, TRI))
|
||||
// Physical register use would be clobbered.
|
||||
return false;
|
||||
if (!MO.isDead() && Defs.count(MOReg))
|
||||
if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
|
||||
// May clobber a physical register def.
|
||||
// FIXME: This may be too conservative. It's ok if the instruction
|
||||
// is sunken completely below the use.
|
||||
return false;
|
||||
} else {
|
||||
if (Defs.count(MOReg))
|
||||
if (regOverlapsSet(Defs, MOReg, TRI))
|
||||
return false;
|
||||
bool isKill =
|
||||
MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
|
||||
if (MOReg != Reg &&
|
||||
((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
|
||||
if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
|
||||
regOverlapsSet(Kills, MOReg, TRI)))
|
||||
// Don't want to extend other live ranges and update kills.
|
||||
return false;
|
||||
if (MOReg == Reg && !isKill)
|
||||
|
@ -19,8 +19,8 @@
|
||||
#include "llvm/ADT/FoldingSet.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "AttributeSetNode.h"
|
||||
#include "llvm/Support/DataTypes.h"
|
||||
#include "llvm/Support/TrailingObjects.h"
|
||||
#include <climits>
|
||||
#include <string>
|
||||
|
||||
@ -142,73 +142,6 @@ public:
|
||||
StringRef getStringValue() const { return Val; }
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \class
|
||||
/// \brief This class represents a group of attributes that apply to one
|
||||
/// element: function, return type, or parameter.
|
||||
class AttributeSetNode final
|
||||
: public FoldingSetNode,
|
||||
private TrailingObjects<AttributeSetNode, Attribute> {
|
||||
friend TrailingObjects;
|
||||
|
||||
unsigned NumAttrs; ///< Number of attributes in this node.
|
||||
/// Bitset with a bit for each available attribute Attribute::AttrKind.
|
||||
uint64_t AvailableAttrs;
|
||||
|
||||
AttributeSetNode(ArrayRef<Attribute> Attrs)
|
||||
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
|
||||
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
|
||||
"Too many attributes for AvailableAttrs");
|
||||
// There's memory after the node where we can store the entries in.
|
||||
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
|
||||
|
||||
for (Attribute I : *this) {
|
||||
if (!I.isStringAttribute()) {
|
||||
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AttributesSetNode is uniqued, these should not be publicly available.
|
||||
void operator=(const AttributeSetNode &) = delete;
|
||||
AttributeSetNode(const AttributeSetNode &) = delete;
|
||||
public:
|
||||
void operator delete(void *p) { ::operator delete(p); }
|
||||
|
||||
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
|
||||
|
||||
/// \brief Return the number of attributes this AttributeSet contains.
|
||||
unsigned getNumAttributes() const { return NumAttrs; }
|
||||
|
||||
bool hasAttribute(Attribute::AttrKind Kind) const {
|
||||
return AvailableAttrs & ((uint64_t)1) << Kind;
|
||||
}
|
||||
bool hasAttribute(StringRef Kind) const;
|
||||
bool hasAttributes() const { return NumAttrs != 0; }
|
||||
|
||||
Attribute getAttribute(Attribute::AttrKind Kind) const;
|
||||
Attribute getAttribute(StringRef Kind) const;
|
||||
|
||||
unsigned getAlignment() const;
|
||||
unsigned getStackAlignment() const;
|
||||
uint64_t getDereferenceableBytes() const;
|
||||
uint64_t getDereferenceableOrNullBytes() const;
|
||||
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
|
||||
std::string getAsString(bool InAttrGrp) const;
|
||||
|
||||
typedef const Attribute *iterator;
|
||||
iterator begin() const { return getTrailingObjects<Attribute>(); }
|
||||
iterator end() const { return begin() + NumAttrs; }
|
||||
|
||||
void Profile(FoldingSetNodeID &ID) const {
|
||||
Profile(ID, makeArrayRef(begin(), end()));
|
||||
}
|
||||
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
|
||||
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
|
||||
AttrList[I].Profile(ID);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
98
lib/IR/AttributeSetNode.h
Normal file
98
lib/IR/AttributeSetNode.h
Normal file
@ -0,0 +1,98 @@
|
||||
//===-- AttributeSetNode.h - AttributeSet Internal Node ---------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file
|
||||
/// \brief This file defines the node class used internally by AttributeSet.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_IR_ATTRIBUTESETNODE_H
|
||||
#define LLVM_IR_ATTRIBUTESETNODE_H
|
||||
|
||||
#include "llvm/ADT/FoldingSet.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/Support/TrailingObjects.h"
|
||||
#include <climits>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
/// \class
|
||||
/// \brief This class represents a group of attributes that apply to one
|
||||
/// element: function, return type, or parameter.
|
||||
class AttributeSetNode final
|
||||
: public FoldingSetNode,
|
||||
private TrailingObjects<AttributeSetNode, Attribute> {
|
||||
friend TrailingObjects;
|
||||
|
||||
unsigned NumAttrs; ///< Number of attributes in this node.
|
||||
/// Bitset with a bit for each available attribute Attribute::AttrKind.
|
||||
uint64_t AvailableAttrs;
|
||||
|
||||
AttributeSetNode(ArrayRef<Attribute> Attrs)
|
||||
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
|
||||
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
|
||||
"Too many attributes for AvailableAttrs");
|
||||
// There's memory after the node where we can store the entries in.
|
||||
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
|
||||
|
||||
for (Attribute I : *this) {
|
||||
if (!I.isStringAttribute()) {
|
||||
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AttributesSetNode is uniqued, these should not be publicly available.
|
||||
void operator=(const AttributeSetNode &) = delete;
|
||||
AttributeSetNode(const AttributeSetNode &) = delete;
|
||||
public:
|
||||
void operator delete(void *p) { ::operator delete(p); }
|
||||
|
||||
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
|
||||
|
||||
static AttributeSetNode *get(AttributeSet AS, unsigned Index) {
|
||||
return AS.getAttributes(Index);
|
||||
}
|
||||
|
||||
/// \brief Return the number of attributes this AttributeSet contains.
|
||||
unsigned getNumAttributes() const { return NumAttrs; }
|
||||
|
||||
bool hasAttribute(Attribute::AttrKind Kind) const {
|
||||
return AvailableAttrs & ((uint64_t)1) << Kind;
|
||||
}
|
||||
bool hasAttribute(StringRef Kind) const;
|
||||
bool hasAttributes() const { return NumAttrs != 0; }
|
||||
|
||||
Attribute getAttribute(Attribute::AttrKind Kind) const;
|
||||
Attribute getAttribute(StringRef Kind) const;
|
||||
|
||||
unsigned getAlignment() const;
|
||||
unsigned getStackAlignment() const;
|
||||
uint64_t getDereferenceableBytes() const;
|
||||
uint64_t getDereferenceableOrNullBytes() const;
|
||||
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
|
||||
std::string getAsString(bool InAttrGrp) const;
|
||||
|
||||
typedef const Attribute *iterator;
|
||||
iterator begin() const { return getTrailingObjects<Attribute>(); }
|
||||
iterator end() const { return begin() + NumAttrs; }
|
||||
|
||||
void Profile(FoldingSetNodeID &ID) const {
|
||||
Profile(ID, makeArrayRef(begin(), end()));
|
||||
}
|
||||
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
|
||||
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
|
||||
AttrList[I].Profile(ID);
|
||||
}
|
||||
};
|
||||
|
||||
} // end llvm namespace
|
||||
|
||||
#endif
|
@ -251,8 +251,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
Name == "sse2.cvtps2pd" ||
|
||||
Name == "avx.cvtdq2.pd.256" ||
|
||||
Name == "avx.cvt.ps2.pd.256" ||
|
||||
Name == "sse2.cvttps2dq" ||
|
||||
Name.startswith("avx.cvtt.") ||
|
||||
Name.startswith("avx.vinsertf128.") ||
|
||||
Name == "avx2.vinserti128" ||
|
||||
Name.startswith("avx.vextractf128.") ||
|
||||
@ -712,12 +710,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
|
||||
else
|
||||
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
|
||||
} else if (IsX86 && (Name == "sse2.cvttps2dq" ||
|
||||
Name.startswith("avx.cvtt."))) {
|
||||
// Truncation (round to zero) float/double to i32 vector conversion.
|
||||
Value *Src = CI->getArgOperand(0);
|
||||
VectorType *DstTy = cast<VectorType>(CI->getType());
|
||||
Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
|
||||
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
|
||||
Module *M = F->getParent();
|
||||
SmallVector<Metadata *, 1> Elts;
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/Bitcode/ReaderWriter.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "AttributeSetNode.h"
|
||||
#include "llvm/IR/CallSite.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
@ -1844,6 +1845,18 @@ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
|
||||
unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
|
||||
}
|
||||
|
||||
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
|
||||
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
|
||||
return ASN->getNumAttributes();
|
||||
}
|
||||
|
||||
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef *Attrs) {
|
||||
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
|
||||
for (auto A: make_range(ASN->begin(), ASN->end()))
|
||||
*Attrs++ = wrap(A);
|
||||
}
|
||||
|
||||
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
|
||||
LLVMAttributeIndex Idx,
|
||||
unsigned KindID) {
|
||||
@ -2216,6 +2229,21 @@ void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
|
||||
CallSite(unwrap<Instruction>(C)).addAttribute(Idx, unwrap(A));
|
||||
}
|
||||
|
||||
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
|
||||
LLVMAttributeIndex Idx) {
|
||||
auto CS = CallSite(unwrap<Instruction>(C));
|
||||
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
|
||||
return ASN->getNumAttributes();
|
||||
}
|
||||
|
||||
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
|
||||
LLVMAttributeRef *Attrs) {
|
||||
auto CS = CallSite(unwrap<Instruction>(C));
|
||||
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
|
||||
for (auto A: make_range(ASN->begin(), ASN->end()))
|
||||
*Attrs++ = wrap(A);
|
||||
}
|
||||
|
||||
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
|
||||
LLVMAttributeIndex Idx,
|
||||
unsigned KindID) {
|
||||
|
@ -675,8 +675,8 @@ void MDNode::handleChangedOperand(void *Ref, Metadata *New) {
|
||||
Metadata *Old = getOperand(Op);
|
||||
setOperand(Op, New);
|
||||
|
||||
// Drop uniquing for self-reference cycles.
|
||||
if (New == this) {
|
||||
// Drop uniquing for self-reference cycles and deleted constants.
|
||||
if (New == this || (!New && Old && isa<ConstantAsMetadata>(Old))) {
|
||||
if (!isResolved())
|
||||
resolve();
|
||||
storeDistinctInContext();
|
||||
|
@ -201,6 +201,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
|
||||
switch (Kind) {
|
||||
case UnknownEnvironment: return "unknown";
|
||||
case GNU: return "gnu";
|
||||
case GNUABI64: return "gnuabi64";
|
||||
case GNUEABIHF: return "gnueabihf";
|
||||
case GNUEABI: return "gnueabi";
|
||||
case GNUX32: return "gnux32";
|
||||
@ -468,6 +469,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
|
||||
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
|
||||
.StartsWith("eabihf", Triple::EABIHF)
|
||||
.StartsWith("eabi", Triple::EABI)
|
||||
.StartsWith("gnuabi64", Triple::GNUABI64)
|
||||
.StartsWith("gnueabihf", Triple::GNUEABIHF)
|
||||
.StartsWith("gnueabi", Triple::GNUEABI)
|
||||
.StartsWith("gnux32", Triple::GNUX32)
|
||||
|
@ -250,6 +250,7 @@ def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
|
||||
FeatureMacroOpFusion,
|
||||
FeatureNEON,
|
||||
FeaturePostRAScheduler,
|
||||
FeaturePredictableSelectIsExpensive,
|
||||
HasV8_1aOps]>;
|
||||
|
||||
def : ProcessorModel<"generic", NoSchedModel, [
|
||||
|
@ -7685,6 +7685,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
|
||||
/// Fold a floating-point multiply by power of two into floating-point to
|
||||
/// fixed-point conversion.
|
||||
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *Subtarget) {
|
||||
if (!Subtarget->hasNEON())
|
||||
return SDValue();
|
||||
@ -7728,10 +7729,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
|
||||
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
|
||||
break;
|
||||
case 4:
|
||||
ResTy = MVT::v4i32;
|
||||
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
|
||||
return SDValue();
|
||||
|
||||
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
|
||||
"Illegal vector type after legalization");
|
||||
|
||||
SDLoc DL(N);
|
||||
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
|
||||
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
|
||||
@ -9853,7 +9860,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return performIntToFpCombine(N, DAG, Subtarget);
|
||||
case ISD::FP_TO_SINT:
|
||||
case ISD::FP_TO_UINT:
|
||||
return performFpToIntCombine(N, DAG, Subtarget);
|
||||
return performFpToIntCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::FDIV:
|
||||
return performFDivCombine(N, DAG, Subtarget);
|
||||
case ISD::OR:
|
||||
|
@ -20,6 +20,7 @@ class AMDGPUInstrPrinter;
|
||||
class AMDGPUSubtarget;
|
||||
class AMDGPUTargetMachine;
|
||||
class FunctionPass;
|
||||
class GCNTargetMachine;
|
||||
struct MachineSchedContext;
|
||||
class MCAsmInfo;
|
||||
class raw_ostream;
|
||||
@ -50,7 +51,7 @@ FunctionPass *createSIFixSGPRCopiesPass();
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSIDebuggerInsertNopsPass();
|
||||
FunctionPass *createSIInsertWaitsPass();
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass(const TargetMachine *TM = nullptr);
|
||||
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
|
||||
|
||||
ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);
|
||||
|
||||
|
@ -783,15 +783,19 @@ void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
|
||||
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
|
||||
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
|
||||
RuntimeMD::OpenCL_C, 1);
|
||||
auto Node = MD->getOperand(0);
|
||||
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
|
||||
Major * 100 + Minor * 10, 2);
|
||||
if (MD->getNumOperands()) {
|
||||
auto Node = MD->getOperand(0);
|
||||
if (Node->getNumOperands() > 1) {
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
|
||||
RuntimeMD::OpenCL_C, 1);
|
||||
uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
|
||||
->getZExtValue();
|
||||
uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
|
||||
->getZExtValue();
|
||||
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
|
||||
Major * 100 + Minor * 10, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,9 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
|
||||
#include "llvm/Analysis/DivergenceAnalysis.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
@ -30,15 +32,28 @@ using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class AMDGPUCodeGenPrepare : public FunctionPass,
|
||||
public InstVisitor<AMDGPUCodeGenPrepare> {
|
||||
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
|
||||
const GCNTargetMachine *TM;
|
||||
const SISubtarget *ST;
|
||||
DivergenceAnalysis *DA;
|
||||
const TargetMachine *TM;
|
||||
Module *Mod;
|
||||
bool HasUnsafeFPMath;
|
||||
|
||||
public:
|
||||
static char ID;
|
||||
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
|
||||
FunctionPass(ID),
|
||||
TM(TM) { }
|
||||
TM(static_cast<const GCNTargetMachine *>(TM)),
|
||||
ST(nullptr),
|
||||
DA(nullptr),
|
||||
Mod(nullptr),
|
||||
HasUnsafeFPMath(false) { }
|
||||
|
||||
bool visitFDiv(BinaryOperator &I);
|
||||
|
||||
bool visitInstruction(Instruction &I) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
bool runOnFunction(Function &F) override;
|
||||
@ -55,7 +70,92 @@ public:
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
|
||||
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
|
||||
if (!CNum)
|
||||
return false;
|
||||
|
||||
// Reciprocal f32 is handled separately without denormals.
|
||||
return UnsafeDiv || CNum->isExactlyValue(+1.0);
|
||||
}
|
||||
|
||||
// Insert an intrinsic for fast fdiv for safe math situations where we can
|
||||
// reduce precision. Leave fdiv for situations where the generic node is
|
||||
// expected to be optimized.
|
||||
bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
|
||||
Type *Ty = FDiv.getType();
|
||||
|
||||
// TODO: Handle half
|
||||
if (!Ty->getScalarType()->isFloatTy())
|
||||
return false;
|
||||
|
||||
MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
|
||||
if (!FPMath)
|
||||
return false;
|
||||
|
||||
const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
|
||||
float ULP = FPOp->getFPAccuracy();
|
||||
if (ULP < 2.5f)
|
||||
return false;
|
||||
|
||||
FastMathFlags FMF = FPOp->getFastMathFlags();
|
||||
bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
|
||||
FMF.allowReciprocal();
|
||||
if (ST->hasFP32Denormals() && !UnsafeDiv)
|
||||
return false;
|
||||
|
||||
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
|
||||
Builder.setFastMathFlags(FMF);
|
||||
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
|
||||
|
||||
const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
|
||||
Function *Decl
|
||||
= II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
|
||||
|
||||
Value *Num = FDiv.getOperand(0);
|
||||
Value *Den = FDiv.getOperand(1);
|
||||
|
||||
Value *NewFDiv = nullptr;
|
||||
|
||||
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
|
||||
NewFDiv = UndefValue::get(VT);
|
||||
|
||||
// FIXME: Doesn't do the right thing for cases where the vector is partially
|
||||
// constant. This works when the scalarizer pass is run first.
|
||||
for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
|
||||
Value *NumEltI = Builder.CreateExtractElement(Num, I);
|
||||
Value *DenEltI = Builder.CreateExtractElement(Den, I);
|
||||
Value *NewElt;
|
||||
|
||||
if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
|
||||
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
|
||||
} else {
|
||||
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
|
||||
}
|
||||
|
||||
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
|
||||
}
|
||||
} else {
|
||||
if (!shouldKeepFDivF32(Num, UnsafeDiv))
|
||||
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
|
||||
}
|
||||
|
||||
if (NewFDiv) {
|
||||
FDiv.replaceAllUsesWith(NewFDiv);
|
||||
NewFDiv->takeName(&FDiv);
|
||||
FDiv.eraseFromParent();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool hasUnsafeFPMath(const Function &F) {
|
||||
Attribute Attr = F.getFnAttribute("unsafe-fp-math");
|
||||
return Attr.getValueAsString() == "true";
|
||||
}
|
||||
|
||||
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
|
||||
Mod = &M;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -63,10 +163,21 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
|
||||
if (!TM || skipFunction(F))
|
||||
return false;
|
||||
|
||||
ST = &TM->getSubtarget<SISubtarget>(F);
|
||||
DA = &getAnalysis<DivergenceAnalysis>();
|
||||
visit(F);
|
||||
HasUnsafeFPMath = hasUnsafeFPMath(F);
|
||||
|
||||
return true;
|
||||
bool MadeChange = false;
|
||||
|
||||
for (BasicBlock &BB : F) {
|
||||
BasicBlock::iterator Next;
|
||||
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
|
||||
Next = std::next(I);
|
||||
MadeChange |= visit(*I);
|
||||
}
|
||||
}
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
|
||||
@ -77,6 +188,6 @@ INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
|
||||
|
||||
char AMDGPUCodeGenPrepare::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const TargetMachine *TM) {
|
||||
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
|
||||
return new AMDGPUCodeGenPrepare(TM);
|
||||
}
|
||||
|
@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
|
||||
int PI = 0x40490fdb;
|
||||
int TWO_PI_INV = 0x3e22f983;
|
||||
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
|
||||
int FP32_NEG_ONE = 0xbf800000;
|
||||
int FP32_ONE = 0x3f800000;
|
||||
int FP32_NEG_ONE = 0xbf800000;
|
||||
int FP64_ONE = 0x3ff0000000000000;
|
||||
int FP64_NEG_ONE = 0xbff0000000000000;
|
||||
}
|
||||
def CONST : Constants;
|
||||
|
||||
|
@ -29,16 +29,39 @@ static const char *const IntrinsicNameTable[] = {
|
||||
#undef GET_INTRINSIC_NAME_TABLE
|
||||
};
|
||||
|
||||
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
|
||||
unsigned numTys) const {
|
||||
if (IntrID < Intrinsic::num_intrinsics) {
|
||||
return nullptr;
|
||||
}
|
||||
namespace {
|
||||
#define GET_INTRINSIC_ATTRIBUTES
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_ATTRIBUTES
|
||||
}
|
||||
|
||||
StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
|
||||
ArrayRef<Type *> Tys) const {
|
||||
if (IntrID < Intrinsic::num_intrinsics)
|
||||
return StringRef();
|
||||
|
||||
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
|
||||
"Invalid intrinsic ID");
|
||||
|
||||
std::string Result(IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics]);
|
||||
return Result;
|
||||
return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
|
||||
}
|
||||
|
||||
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
|
||||
unsigned NumTys) const {
|
||||
return getName(IntrID, makeArrayRef(Tys, NumTys)).str();
|
||||
}
|
||||
|
||||
FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
|
||||
ArrayRef<Type*> Tys) const {
|
||||
// FIXME: Re-use Intrinsic::getType machinery
|
||||
switch (ID) {
|
||||
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
|
||||
Type *F32Ty = Type::getFloatTy(Context);
|
||||
return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unhandled intrinsic");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
|
||||
@ -69,7 +92,19 @@ bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
|
||||
}
|
||||
|
||||
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
||||
Type **Tys,
|
||||
unsigned numTys) const {
|
||||
llvm_unreachable("Not implemented");
|
||||
ArrayRef<Type *> Tys) const {
|
||||
FunctionType *FTy = getType(M->getContext(), IntrID, Tys);
|
||||
Function *F
|
||||
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
|
||||
|
||||
AttributeSet AS = getAttributes(M->getContext(),
|
||||
static_cast<AMDGPUIntrinsic::ID>(IntrID));
|
||||
F->setAttributes(AS);
|
||||
return F;
|
||||
}
|
||||
|
||||
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
||||
Type **Tys,
|
||||
unsigned NumTys) const {
|
||||
return getDeclaration(M, IntrID, makeArrayRef(Tys, NumTys));
|
||||
}
|
||||
|
@ -34,13 +34,23 @@ enum ID {
|
||||
class AMDGPUIntrinsicInfo final : public TargetIntrinsicInfo {
|
||||
public:
|
||||
AMDGPUIntrinsicInfo();
|
||||
|
||||
StringRef getName(unsigned IntrId, ArrayRef<Type *> Tys = None) const;
|
||||
|
||||
std::string getName(unsigned IntrId, Type **Tys = nullptr,
|
||||
unsigned numTys = 0) const override;
|
||||
unsigned NumTys = 0) const override;
|
||||
|
||||
unsigned lookupName(const char *Name, unsigned Len) const override;
|
||||
bool isOverloaded(unsigned IID) const override;
|
||||
Function *getDeclaration(Module *M, unsigned ID,
|
||||
Type **Tys = nullptr,
|
||||
unsigned numTys = 0) const override;
|
||||
unsigned NumTys = 0) const override;
|
||||
|
||||
Function *getDeclaration(Module *M, unsigned ID,
|
||||
ArrayRef<Type *> = None) const;
|
||||
|
||||
FunctionType *getType(LLVMContext &Context, unsigned ID,
|
||||
ArrayRef<Type*> Tys = None) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -348,9 +348,6 @@ static VectorType *arrayTypeToVecType(Type *ArrayTy) {
|
||||
static Value *
|
||||
calculateVectorIndex(Value *Ptr,
|
||||
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
|
||||
if (isa<AllocaInst>(Ptr))
|
||||
return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
|
||||
|
||||
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
|
||||
|
||||
auto I = GEPIdx.find(GEP);
|
||||
@ -360,11 +357,11 @@ calculateVectorIndex(Value *Ptr,
|
||||
static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
|
||||
// FIXME we only support simple cases
|
||||
if (GEP->getNumOperands() != 3)
|
||||
return NULL;
|
||||
return nullptr;
|
||||
|
||||
ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
|
||||
if (!I0 || !I0->isZero())
|
||||
return NULL;
|
||||
return nullptr;
|
||||
|
||||
return GEP->getOperand(2);
|
||||
}
|
||||
@ -398,7 +395,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
// are just being conservative for now.
|
||||
if (!AllocaTy ||
|
||||
AllocaTy->getElementType()->isVectorTy() ||
|
||||
AllocaTy->getNumElements() > 4) {
|
||||
AllocaTy->getNumElements() > 4 ||
|
||||
AllocaTy->getNumElements() < 2) {
|
||||
DEBUG(dbgs() << " Cannot convert type to vector\n");
|
||||
return false;
|
||||
}
|
||||
@ -443,9 +441,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
IRBuilder<> Builder(Inst);
|
||||
switch (Inst->getOpcode()) {
|
||||
case Instruction::Load: {
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
Value *Ptr = Inst->getOperand(0);
|
||||
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
|
||||
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
|
||||
|
||||
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
|
||||
Value *VecValue = Builder.CreateLoad(BitCast);
|
||||
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
|
||||
Inst->replaceAllUsesWith(ExtractElement);
|
||||
@ -453,9 +453,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
break;
|
||||
}
|
||||
case Instruction::Store: {
|
||||
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
|
||||
|
||||
Value *Ptr = Inst->getOperand(1);
|
||||
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
|
||||
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
|
||||
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
|
||||
Value *VecValue = Builder.CreateLoad(BitCast);
|
||||
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
|
||||
Inst->getOperand(0),
|
||||
@ -469,7 +471,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
break;
|
||||
|
||||
default:
|
||||
Inst->dump();
|
||||
llvm_unreachable("Inconsistency in instructions promotable to vector");
|
||||
}
|
||||
}
|
||||
@ -477,11 +478,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
|
||||
}
|
||||
|
||||
static bool isCallPromotable(CallInst *CI) {
|
||||
// TODO: We might be able to handle some cases where the callee is a
|
||||
// constantexpr bitcast of a function.
|
||||
if (!CI->getCalledFunction())
|
||||
return false;
|
||||
|
||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
|
||||
if (!II)
|
||||
return false;
|
||||
@ -773,28 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
|
||||
continue;
|
||||
}
|
||||
|
||||
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
|
||||
if (!Intr) {
|
||||
// FIXME: What is this for? It doesn't make sense to promote arbitrary
|
||||
// function calls. If the call is to a defined function that can also be
|
||||
// promoted, we should be able to do this once that function is also
|
||||
// rewritten.
|
||||
|
||||
std::vector<Type*> ArgTypes;
|
||||
for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
|
||||
ArgIdx != ArgEnd; ++ArgIdx) {
|
||||
ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
|
||||
}
|
||||
Function *F = Call->getCalledFunction();
|
||||
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
|
||||
F->isVarArg());
|
||||
Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
|
||||
NewType, F->getAttributes());
|
||||
Function *NewF = cast<Function>(C);
|
||||
Call->setCalledFunction(NewF);
|
||||
continue;
|
||||
}
|
||||
|
||||
IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
|
||||
Builder.SetInsertPoint(Intr);
|
||||
switch (Intr->getIntrinsicID()) {
|
||||
case Intrinsic::lifetime_start:
|
||||
|
@ -309,6 +309,7 @@ public:
|
||||
ScheduleDAGInstrs *
|
||||
createMachineScheduler(MachineSchedContext *C) const override;
|
||||
|
||||
void addIRPasses() override;
|
||||
bool addPreISel() override;
|
||||
void addMachineSSAOptimization() override;
|
||||
bool addInstSelector() override;
|
||||
@ -499,6 +500,13 @@ void GCNPassConfig::addMachineSSAOptimization() {
|
||||
addPass(&DeadMachineInstructionElimID);
|
||||
}
|
||||
|
||||
void GCNPassConfig::addIRPasses() {
|
||||
// TODO: May want to move later or split into an early and late one.
|
||||
addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
|
||||
|
||||
AMDGPUPassConfig::addIRPasses();
|
||||
}
|
||||
|
||||
bool GCNPassConfig::addInstSelector() {
|
||||
AMDGPUPassConfig::addInstSelector();
|
||||
addPass(createSILowerI1CopiesPass());
|
||||
|
@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SETCC, MVT::i32, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::f32, Expand);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
|
||||
|
||||
@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
return;
|
||||
case ISD::FP_TO_UINT:
|
||||
if (N->getValueType(0) == MVT::i1) {
|
||||
Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
|
||||
Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
|
||||
return;
|
||||
}
|
||||
// Fall-through. Since we don't care about out of bounds values
|
||||
// we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
|
||||
// considers some extra cases which are not necessary here.
|
||||
case ISD::FP_TO_SINT: {
|
||||
if (N->getValueType(0) == MVT::i1) {
|
||||
Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
|
||||
return;
|
||||
}
|
||||
|
||||
SDValue Result;
|
||||
if (expandFP_TO_SINT(N, Result, DAG))
|
||||
Results.push_back(Result);
|
||||
@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
|
||||
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
return DAG.getNode(
|
||||
ISD::SETCC,
|
||||
DL,
|
||||
MVT::i1,
|
||||
Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETNE)
|
||||
);
|
||||
Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETEQ));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
return DAG.getNode(
|
||||
ISD::SETCC,
|
||||
DL,
|
||||
MVT::i1,
|
||||
Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETEQ));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
|
@ -72,7 +72,8 @@ private:
|
||||
|
||||
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -41,7 +41,8 @@ enum {
|
||||
WQM = 1 << 22,
|
||||
VGPRSpill = 1 << 23,
|
||||
VOPAsmPrefer32Bit = 1 << 24,
|
||||
Gather4 = 1 << 25
|
||||
Gather4 = 1 << 25,
|
||||
DisableWQM = 1 << 26
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1134,9 +1134,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineFunction *MF = BB->getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOVK_I32))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(MFI->LDSSize);
|
||||
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
|
||||
.addOperand(MI.getOperand(0))
|
||||
.addImm(MFI->LDSSize);
|
||||
MI.eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
@ -1792,6 +1792,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
|
||||
Op->getVTList(), Ops, VT, MMO);
|
||||
}
|
||||
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
|
||||
return lowerFDIV_FAST(Op, DAG);
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_vs_load_input:
|
||||
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
|
||||
Op.getOperand(1),
|
||||
@ -2098,7 +2101,8 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Catch division cases where we can use shortcuts with rcp and rsq
|
||||
// instructions.
|
||||
SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc SL(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
@ -2139,47 +2143,48 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Faster 2.5 ULP division that does not support denormals.
|
||||
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDLoc SL(Op);
|
||||
SDValue LHS = Op.getOperand(1);
|
||||
SDValue RHS = Op.getOperand(2);
|
||||
|
||||
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
|
||||
|
||||
const APFloat K0Val(BitsToFloat(0x6f800000));
|
||||
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
|
||||
|
||||
const APFloat K1Val(BitsToFloat(0x2f800000));
|
||||
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
|
||||
|
||||
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
|
||||
|
||||
EVT SetCCVT =
|
||||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
|
||||
|
||||
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
|
||||
|
||||
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
|
||||
|
||||
// TODO: Should this propagate fast-math-flags?
|
||||
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
|
||||
|
||||
// rcp does not support denormals.
|
||||
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
|
||||
|
||||
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
|
||||
|
||||
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (SDValue FastLowered = LowerFastFDIV(Op, DAG))
|
||||
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
|
||||
return FastLowered;
|
||||
|
||||
SDLoc SL(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
|
||||
// faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag
|
||||
if (EnableAMDGPUFastFDIV) {
|
||||
// This does not support denormals.
|
||||
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
|
||||
|
||||
const APFloat K0Val(BitsToFloat(0x6f800000));
|
||||
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
|
||||
|
||||
const APFloat K1Val(BitsToFloat(0x2f800000));
|
||||
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
|
||||
|
||||
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
|
||||
|
||||
EVT SetCCVT =
|
||||
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
|
||||
|
||||
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
|
||||
|
||||
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
|
||||
|
||||
// TODO: Should this propagate fast-math-flags?
|
||||
|
||||
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
|
||||
|
||||
// rcp does not support denormals.
|
||||
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
|
||||
|
||||
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
|
||||
|
||||
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
|
||||
}
|
||||
|
||||
// Generates more precise fpdiv32.
|
||||
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
|
||||
|
||||
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
|
||||
@ -2209,7 +2214,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (DAG.getTarget().Options.UnsafeFPMath)
|
||||
return LowerFastFDIV(Op, DAG);
|
||||
return lowerFastUnsafeFDIV(Op, DAG);
|
||||
|
||||
SDLoc SL(Op);
|
||||
SDValue X = Op.getOperand(0);
|
||||
|
@ -36,7 +36,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -41,6 +41,8 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
field bits<1> DS = 0;
|
||||
field bits<1> MIMG = 0;
|
||||
field bits<1> FLAT = 0;
|
||||
|
||||
// Whether WQM _must_ be enabled for this instruction.
|
||||
field bits<1> WQM = 0;
|
||||
field bits<1> VGPRSpill = 0;
|
||||
|
||||
@ -50,6 +52,9 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
|
||||
field bits<1> Gather4 = 0;
|
||||
|
||||
// Whether WQM _must_ be disabled for this instruction.
|
||||
field bits<1> DisableWQM = 0;
|
||||
|
||||
// These need to be kept in sync with the enum in SIInstrFlags.
|
||||
let TSFlags{0} = VM_CNT;
|
||||
let TSFlags{1} = EXP_CNT;
|
||||
@ -81,6 +86,7 @@ class InstSI <dag outs, dag ins, string asm = "",
|
||||
let TSFlags{23} = VGPRSpill;
|
||||
let TSFlags{24} = VOPAsmPrefer32Bit;
|
||||
let TSFlags{25} = Gather4;
|
||||
let TSFlags{26} = DisableWQM;
|
||||
|
||||
let SchedRW = [Write32Bit];
|
||||
|
||||
|
@ -738,7 +738,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
|
||||
MachineBasicBlock::iterator Insert = Entry.front();
|
||||
DebugLoc DL = Insert->getDebugLoc();
|
||||
|
||||
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
|
||||
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
|
||||
*MF);
|
||||
if (TIDReg == AMDGPU::NoRegister)
|
||||
return TIDReg;
|
||||
|
||||
|
@ -340,6 +340,14 @@ public:
|
||||
return get(Opcode).TSFlags & SIInstrFlags::WQM;
|
||||
}
|
||||
|
||||
static bool isDisableWQM(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
|
||||
}
|
||||
|
||||
bool isDisableWQM(uint16_t Opcode) const {
|
||||
return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
|
||||
}
|
||||
|
||||
static bool isVGPRSpill(const MachineInstr &MI) {
|
||||
return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
|
||||
}
|
||||
|
@ -2949,6 +2949,10 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
|
||||
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
|
||||
MUBUFAddr64Table <0>;
|
||||
|
||||
let DisableWQM = 1 in {
|
||||
def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>;
|
||||
}
|
||||
|
||||
let addr64 = 0, isCodeGenOnly = 0 in {
|
||||
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
|
||||
}
|
||||
@ -3019,7 +3023,8 @@ multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins,
|
||||
multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
|
||||
ValueType vt, SDPatternOperator atomic> {
|
||||
|
||||
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in {
|
||||
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1,
|
||||
DisableWQM = 1 in {
|
||||
|
||||
// No return variants
|
||||
let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in {
|
||||
@ -3423,6 +3428,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
let hasPostISelHook = 0;
|
||||
let DisableWQM = 1;
|
||||
}
|
||||
|
||||
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
|
||||
@ -3454,6 +3460,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
|
||||
let mayStore = 1;
|
||||
let hasSideEffects = 1;
|
||||
let hasPostISelHook = 0;
|
||||
let DisableWQM = 1;
|
||||
let Constraints = "$vdst = $vdata";
|
||||
let AsmMatchConverter = "cvtMIMGAtomic";
|
||||
}
|
||||
|
@ -2200,7 +2200,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
(name vt:$vdata, v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
|
||||
imm:$glc, imm:$slc),
|
||||
(!cast<MUBUF>(opcode # _OFFSET) $vdata, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i1imm $glc), (as_i1imm $slc), 0)
|
||||
>;
|
||||
|
||||
@ -2208,7 +2208,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
|
||||
imm:$glc, imm:$slc),
|
||||
(!cast<MUBUF>(opcode # _IDXEN) $vdata, $vindex, $rsrc, $soffset,
|
||||
(!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i1imm $glc),
|
||||
(as_i1imm $slc), 0)
|
||||
>;
|
||||
@ -2217,7 +2217,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
(name vt:$vdata, v4i32:$rsrc, 0,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
|
||||
imm:$glc, imm:$slc),
|
||||
(!cast<MUBUF>(opcode # _OFFEN) $vdata, $voffset, $rsrc, $soffset,
|
||||
(!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i1imm $glc),
|
||||
(as_i1imm $slc), 0)
|
||||
>;
|
||||
@ -2226,7 +2226,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
|
||||
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
|
||||
imm:$glc, imm:$slc),
|
||||
(!cast<MUBUF>(opcode # _BOTHEN)
|
||||
(!cast<MUBUF>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
@ -3391,6 +3391,16 @@ def : Pat <
|
||||
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
|
||||
>;
|
||||
|
||||
class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
|
||||
(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
|
||||
(i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
|
||||
>;
|
||||
|
||||
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
|
||||
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
|
||||
|
||||
// If we need to perform a logical operation on i1 values, we need to
|
||||
// use vector comparisons since there is only one SCC register. Vector
|
||||
// comparisions still write to a pair of SGPRs, so treat these as
|
||||
|
@ -7,7 +7,8 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Intrinsic Definitions
|
||||
// Backend internal SI Intrinsic Definitions. User code should not
|
||||
// directly use these.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
@ -177,6 +178,12 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
||||
} // End TargetPrefix = "SI", isTarget = 1
|
||||
|
||||
let TargetPrefix = "amdgcn", isTarget = 1 in {
|
||||
// Emit 2.5 ulp, no denormal division. Should only be inserted by
|
||||
// pass based on !fpmath metadata.
|
||||
def int_amdgcn_fdiv_fast : Intrinsic<
|
||||
[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
|
||||
>;
|
||||
|
||||
/* Control flow Intrinsics */
|
||||
|
||||
def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
|
||||
|
@ -203,7 +203,8 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
|
||||
Spill.Lane = Lane;
|
||||
|
||||
if (!LaneVGPRs.count(LaneVGPRIdx)) {
|
||||
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
|
||||
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
|
||||
*MF);
|
||||
|
||||
if (LaneVGPR == AMDGPU::NoRegister)
|
||||
// We have no VGPRs left for spilling SGPRs.
|
||||
|
@ -957,10 +957,13 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
|
||||
/// \brief Returns a register that is not used at any point in the function.
|
||||
/// If all registers are used, then this function will return
|
||||
// AMDGPU::NoRegister.
|
||||
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const {
|
||||
unsigned
|
||||
SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC,
|
||||
const MachineFunction &MF) const {
|
||||
|
||||
for (unsigned Reg : *RC)
|
||||
if (!MRI.isPhysRegUsed(Reg))
|
||||
if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
|
||||
return Reg;
|
||||
return AMDGPU::NoRegister;
|
||||
}
|
||||
|
@ -185,7 +185,8 @@ public:
|
||||
unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
|
||||
|
||||
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
|
||||
const TargetRegisterClass *RC) const;
|
||||
const TargetRegisterClass *RC,
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
|
||||
unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
|
||||
|
@ -94,12 +94,15 @@ private:
|
||||
const SIInstrInfo *TII;
|
||||
const SIRegisterInfo *TRI;
|
||||
MachineRegisterInfo *MRI;
|
||||
LiveIntervals *LIS;
|
||||
|
||||
DenseMap<const MachineInstr *, InstrInfo> Instructions;
|
||||
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
|
||||
SmallVector<const MachineInstr *, 2> ExecExports;
|
||||
SmallVector<MachineInstr *, 1> LiveMaskQueries;
|
||||
|
||||
void markInstruction(MachineInstr &MI, char Flag,
|
||||
std::vector<WorkItem> &Worklist);
|
||||
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
|
||||
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
|
||||
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
|
||||
@ -126,6 +129,7 @@ public:
|
||||
}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.addRequired<LiveIntervals>();
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
@ -135,8 +139,11 @@ public:
|
||||
|
||||
char SIWholeQuadMode::ID = 0;
|
||||
|
||||
INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
|
||||
"SI Whole Quad Mode", false, false)
|
||||
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
|
||||
false)
|
||||
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
||||
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
|
||||
false)
|
||||
|
||||
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
|
||||
|
||||
@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
|
||||
return new SIWholeQuadMode;
|
||||
}
|
||||
|
||||
void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
|
||||
std::vector<WorkItem> &Worklist) {
|
||||
InstrInfo &II = Instructions[&MI];
|
||||
|
||||
assert(Flag == StateWQM || Flag == StateExact);
|
||||
|
||||
// Ignore if the instruction is already marked. The typical case is that we
|
||||
// mark an instruction WQM multiple times, but for atomics it can happen that
|
||||
// Flag is StateWQM, but Needs is already set to StateExact. In this case,
|
||||
// letting the atomic run in StateExact is correct as per the relevant specs.
|
||||
if (II.Needs)
|
||||
return;
|
||||
|
||||
II.Needs = Flag;
|
||||
Worklist.push_back(&MI);
|
||||
}
|
||||
|
||||
// Scan instructions to determine which ones require an Exact execmask and
|
||||
// which ones seed WQM requirements.
|
||||
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
||||
@ -161,7 +185,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
||||
|
||||
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
|
||||
Flags = StateWQM;
|
||||
} else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
|
||||
} else if (TII->isDisableWQM(MI)) {
|
||||
Flags = StateExact;
|
||||
} else {
|
||||
// Handle export instructions with the exec mask valid flag set
|
||||
@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
|
||||
continue;
|
||||
}
|
||||
|
||||
Instructions[&MI].Needs = Flags;
|
||||
Worklist.push_back(&MI);
|
||||
markInstruction(MI, Flags, Worklist);
|
||||
GlobalFlags |= Flags;
|
||||
}
|
||||
|
||||
@ -214,9 +237,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
|
||||
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
|
||||
BlockInfo &BI = Blocks[MBB];
|
||||
|
||||
// Control flow-type instructions that are followed by WQM computations
|
||||
// must themselves be in WQM.
|
||||
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
|
||||
// Control flow-type instructions and stores to temporary memory that are
|
||||
// followed by WQM computations must themselves be in WQM.
|
||||
if ((II.OutNeeds & StateWQM) && !II.Needs &&
|
||||
(MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {
|
||||
Instructions[&MI].Needs = StateWQM;
|
||||
II.Needs = StateWQM;
|
||||
}
|
||||
@ -249,32 +273,35 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
|
||||
if (!Use.isReg() || !Use.isUse())
|
||||
continue;
|
||||
|
||||
// At this point, physical registers appear as inputs or outputs
|
||||
// and following them makes no sense (and would in fact be incorrect
|
||||
// when the same VGPR is used as both an output and an input that leads
|
||||
// to a NeedsWQM instruction).
|
||||
//
|
||||
// Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
|
||||
// have to trace this, in practice it happens for 64-bit computations like
|
||||
// pointers where both dwords are followed already anyway.
|
||||
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
|
||||
continue;
|
||||
unsigned Reg = Use.getReg();
|
||||
|
||||
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
|
||||
InstrInfo &DefII = Instructions[&DefMI];
|
||||
|
||||
// Obviously skip if DefMI is already flagged as NeedWQM.
|
||||
//
|
||||
// The instruction might also be flagged as NeedExact. This happens when
|
||||
// the result of an atomic is used in a WQM computation. In this case,
|
||||
// the atomic must not run for helper pixels and the WQM result is
|
||||
// undefined.
|
||||
if (DefII.Needs != 0)
|
||||
// Handle physical registers that we need to track; this is mostly relevant
|
||||
// for VCC, which can appear as the (implicit) input of a uniform branch,
|
||||
// e.g. when a loop counter is stored in a VGPR.
|
||||
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
|
||||
if (Reg == AMDGPU::EXEC)
|
||||
continue;
|
||||
|
||||
DefII.Needs = StateWQM;
|
||||
Worklist.push_back(&DefMI);
|
||||
for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
|
||||
LiveRange &LR = LIS->getRegUnit(*RegUnit);
|
||||
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
|
||||
if (!Value)
|
||||
continue;
|
||||
|
||||
// Since we're in machine SSA, we do not need to track physical
|
||||
// registers across basic blocks.
|
||||
if (Value->isPHIDef())
|
||||
continue;
|
||||
|
||||
markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
|
||||
Worklist);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
|
||||
markInstruction(DefMI, StateWQM, Worklist);
|
||||
}
|
||||
}
|
||||
|
||||
@ -468,6 +495,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
|
||||
TII = ST.getInstrInfo();
|
||||
TRI = &TII->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
LIS = &getAnalysis<LiveIntervals>();
|
||||
|
||||
char GlobalFlags = analyzeFunction(MF);
|
||||
if (!(GlobalFlags & StateWQM)) {
|
||||
|
@ -3857,7 +3857,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Try to convert two saturating conditional selects into a single SSAT
|
||||
SDValue SatValue;
|
||||
uint64_t SatConstant;
|
||||
if (isSaturatingConditional(Op, SatValue, SatConstant))
|
||||
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
|
||||
isSaturatingConditional(Op, SatValue, SatConstant))
|
||||
return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
|
||||
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
|
||||
|
||||
|
@ -3650,7 +3650,8 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
|
||||
|
||||
def SSAT : AI<(outs GPRnopc:$Rd),
|
||||
(ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
|
||||
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
|
||||
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
|
||||
Requires<[IsARM,HasV6]>{
|
||||
bits<4> Rd;
|
||||
bits<5> sat_imm;
|
||||
bits<4> Rn;
|
||||
@ -3666,7 +3667,8 @@ def SSAT : AI<(outs GPRnopc:$Rd),
|
||||
|
||||
def SSAT16 : AI<(outs GPRnopc:$Rd),
|
||||
(ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
|
||||
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
|
||||
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
|
||||
Requires<[IsARM,HasV6]>{
|
||||
bits<4> Rd;
|
||||
bits<4> sat_imm;
|
||||
bits<4> Rn;
|
||||
@ -3679,7 +3681,8 @@ def SSAT16 : AI<(outs GPRnopc:$Rd),
|
||||
|
||||
def USAT : AI<(outs GPRnopc:$Rd),
|
||||
(ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
|
||||
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
|
||||
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
|
||||
Requires<[IsARM,HasV6]> {
|
||||
bits<4> Rd;
|
||||
bits<5> sat_imm;
|
||||
bits<4> Rn;
|
||||
@ -3695,7 +3698,8 @@ def USAT : AI<(outs GPRnopc:$Rd),
|
||||
|
||||
def USAT16 : AI<(outs GPRnopc:$Rd),
|
||||
(ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
|
||||
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
|
||||
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>,
|
||||
Requires<[IsARM,HasV6]>{
|
||||
bits<4> Rd;
|
||||
bits<4> sat_imm;
|
||||
bits<4> Rn;
|
||||
|
@ -2240,7 +2240,8 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin,
|
||||
def t2SSAT: T2SatI<
|
||||
(outs rGPR:$Rd),
|
||||
(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
|
||||
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
|
||||
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
|
||||
Requires<[IsThumb2]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25-22} = 0b1100;
|
||||
let Inst{20} = 0;
|
||||
@ -2251,7 +2252,7 @@ def t2SSAT: T2SatI<
|
||||
def t2SSAT16: T2SatI<
|
||||
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
|
||||
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25-22} = 0b1100;
|
||||
let Inst{20} = 0;
|
||||
@ -2265,7 +2266,8 @@ def t2SSAT16: T2SatI<
|
||||
def t2USAT: T2SatI<
|
||||
(outs rGPR:$Rd),
|
||||
(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
|
||||
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
|
||||
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
|
||||
Requires<[IsThumb2]> {
|
||||
let Inst{31-27} = 0b11110;
|
||||
let Inst{25-22} = 0b1110;
|
||||
let Inst{20} = 0;
|
||||
@ -2275,7 +2277,7 @@ def t2USAT: T2SatI<
|
||||
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
|
||||
NoItinerary,
|
||||
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
Requires<[IsThumb2, HasDSP]> {
|
||||
let Inst{31-22} = 0b1111001110;
|
||||
let Inst{20} = 0;
|
||||
let Inst{15} = 0;
|
||||
|
@ -518,6 +518,10 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
|
||||
return true;
|
||||
return false;
|
||||
|
||||
case ELF::R_MIPS_GOT_PAGE:
|
||||
case ELF::R_MICROMIPS_GOT_PAGE:
|
||||
case ELF::R_MIPS_GOT_OFST:
|
||||
case ELF::R_MICROMIPS_GOT_OFST:
|
||||
case ELF::R_MIPS_16:
|
||||
case ELF::R_MIPS_32:
|
||||
case ELF::R_MIPS_GPREL32:
|
||||
@ -539,8 +543,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
|
||||
case ELF::R_MIPS_SHIFT5:
|
||||
case ELF::R_MIPS_SHIFT6:
|
||||
case ELF::R_MIPS_GOT_DISP:
|
||||
case ELF::R_MIPS_GOT_PAGE:
|
||||
case ELF::R_MIPS_GOT_OFST:
|
||||
case ELF::R_MIPS_GOT_HI16:
|
||||
case ELF::R_MIPS_GOT_LO16:
|
||||
case ELF::R_MIPS_INSERT_A:
|
||||
@ -589,8 +591,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
|
||||
case ELF::R_MICROMIPS_PC16_S1:
|
||||
case ELF::R_MICROMIPS_CALL16:
|
||||
case ELF::R_MICROMIPS_GOT_DISP:
|
||||
case ELF::R_MICROMIPS_GOT_PAGE:
|
||||
case ELF::R_MICROMIPS_GOT_OFST:
|
||||
case ELF::R_MICROMIPS_GOT_HI16:
|
||||
case ELF::R_MICROMIPS_GOT_LO16:
|
||||
case ELF::R_MICROMIPS_SUB:
|
||||
|
@ -28,12 +28,19 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
|
||||
PointerSize = CalleeSaveStackSlotSize = 8;
|
||||
}
|
||||
|
||||
// FIXME: This condition isn't quite right but it's the best we can do until
|
||||
// this object can identify the ABI. It will misbehave when using O32
|
||||
// on a mips64*-* triple.
|
||||
if ((TheTriple.getArch() == Triple::mipsel) ||
|
||||
(TheTriple.getArch() == Triple::mips)) {
|
||||
PrivateGlobalPrefix = "$";
|
||||
PrivateLabelPrefix = "$";
|
||||
}
|
||||
|
||||
AlignmentIsInBytes = false;
|
||||
Data16bitsDirective = "\t.2byte\t";
|
||||
Data32bitsDirective = "\t.4byte\t";
|
||||
Data64bitsDirective = "\t.8byte\t";
|
||||
PrivateGlobalPrefix = "$";
|
||||
PrivateLabelPrefix = "$";
|
||||
CommentString = "#";
|
||||
ZeroDirective = "\t.space\t";
|
||||
GPRel32Directive = "\t.gpword\t";
|
||||
|
@ -57,7 +57,10 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
|
||||
else
|
||||
Ret += "E";
|
||||
|
||||
Ret += "-m:m";
|
||||
if (ABI.IsO32())
|
||||
Ret += "-m:m";
|
||||
else
|
||||
Ret += "-m:e";
|
||||
|
||||
// Pointers are 32 bit on some ABIs.
|
||||
if (!ABI.IsN64())
|
||||
|
@ -1187,6 +1187,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
|
||||
|
||||
@ -13373,6 +13381,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (SrcVT.isVector()) {
|
||||
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
|
||||
return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
|
||||
@ -13380,6 +13389,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
DAG.getUNDEF(SrcVT)));
|
||||
}
|
||||
if (SrcVT.getVectorElementType() == MVT::i1) {
|
||||
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
|
||||
@ -13694,6 +13706,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
|
||||
MVT SVT = N0.getSimpleValueType();
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (SVT.getVectorElementType() == MVT::i1) {
|
||||
if (SVT == MVT::v2i1)
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
|
||||
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
|
||||
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
|
||||
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
|
||||
}
|
||||
|
||||
switch (SVT.SimpleTy) {
|
||||
default:
|
||||
llvm_unreachable("Custom UINT_TO_FP is not supported!");
|
||||
|
@ -2661,7 +2661,8 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
|
||||
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
|
||||
unsigned Opc, bool AllowSP, unsigned &NewSrc,
|
||||
bool &isKill, bool &isUndef,
|
||||
MachineOperand &ImplicitOp) const {
|
||||
MachineOperand &ImplicitOp,
|
||||
LiveVariables *LV) const {
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
const TargetRegisterClass *RC;
|
||||
if (AllowSP) {
|
||||
@ -2715,13 +2716,17 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
|
||||
// Virtual register of the wrong class, we have to create a temporary 64-bit
|
||||
// vreg to feed into the LEA.
|
||||
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
|
||||
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
|
||||
MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
|
||||
get(TargetOpcode::COPY))
|
||||
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
|
||||
.addOperand(Src);
|
||||
|
||||
// Which is obviously going to be dead after we're done with it.
|
||||
isKill = true;
|
||||
isUndef = false;
|
||||
|
||||
if (LV)
|
||||
LV->replaceKillInstruction(SrcReg, MI, *Copy);
|
||||
}
|
||||
|
||||
// We've set all the parameters without issue.
|
||||
@ -2900,7 +2905,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg;
|
||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
|
||||
SrcReg, isKill, isUndef, ImplicitOp))
|
||||
SrcReg, isKill, isUndef, ImplicitOp, LV))
|
||||
return nullptr;
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
@ -2943,7 +2948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg;
|
||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
|
||||
SrcReg, isKill, isUndef, ImplicitOp))
|
||||
SrcReg, isKill, isUndef, ImplicitOp, LV))
|
||||
return nullptr;
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
@ -2977,7 +2982,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg;
|
||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
|
||||
SrcReg, isKill, isUndef, ImplicitOp))
|
||||
SrcReg, isKill, isUndef, ImplicitOp, LV))
|
||||
return nullptr;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
|
||||
@ -3016,7 +3021,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg;
|
||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
|
||||
SrcReg, isKill, isUndef, ImplicitOp))
|
||||
SrcReg, isKill, isUndef, ImplicitOp, LV))
|
||||
return nullptr;
|
||||
|
||||
const MachineOperand &Src2 = MI.getOperand(2);
|
||||
@ -3024,7 +3029,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg2;
|
||||
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
|
||||
SrcReg2, isKill2, isUndef2, ImplicitOp2))
|
||||
SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
|
||||
return nullptr;
|
||||
|
||||
MachineInstrBuilder MIB =
|
||||
@ -3087,7 +3092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
unsigned SrcReg;
|
||||
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
|
||||
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
|
||||
SrcReg, isKill, isUndef, ImplicitOp))
|
||||
SrcReg, isKill, isUndef, ImplicitOp, LV))
|
||||
return nullptr;
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
|
||||
|
@ -230,7 +230,7 @@ public:
|
||||
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
|
||||
unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
|
||||
bool &isKill, bool &isUndef,
|
||||
MachineOperand &ImplicitOp) const;
|
||||
MachineOperand &ImplicitOp, LiveVariables *LV) const;
|
||||
|
||||
/// convertToThreeAddress - This method must be implemented by targets that
|
||||
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
|
||||
|
@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
|
||||
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
|
||||
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
|
||||
@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
|
||||
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
|
||||
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
|
||||
Sched<[WriteCvtF2F]>;
|
||||
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
|
||||
def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
|
||||
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
|
||||
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
|
||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
|
||||
@ -2009,24 +2009,35 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
// SSE2 packed instructions with XS prefix
|
||||
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq VR128:$src))],
|
||||
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
|
||||
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
|
||||
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
|
||||
(loadv4f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
|
||||
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
|
||||
[(set VR256:$dst,
|
||||
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
|
||||
IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
|
||||
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
|
||||
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
|
||||
(loadv8f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
|
||||
Sched<[WriteCvtF2ILd]>;
|
||||
|
||||
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
|
||||
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
|
||||
IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
|
||||
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"cvttps2dq\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
|
||||
[(set VR128:$dst,
|
||||
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
|
||||
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
|
||||
@ -2096,10 +2107,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
// YMM only
|
||||
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
|
||||
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
|
||||
[(set VR128:$dst,
|
||||
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
|
||||
IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
|
||||
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
|
||||
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
|
||||
[], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
|
||||
[(set VR128:$dst,
|
||||
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
|
||||
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
|
||||
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
|
||||
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
|
||||
|
||||
|
@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
|
||||
namespace llvm {
|
||||
template <> struct GraphTraits<ArgumentGraphNode *> {
|
||||
typedef ArgumentGraphNode NodeType;
|
||||
typedef ArgumentGraphNode *NodeRef;
|
||||
typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
|
||||
|
||||
static inline NodeType *getEntryNode(NodeType *A) { return A; }
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include "llvm/Transforms/Utils/CtorUtils.h"
|
||||
#include "llvm/Transforms/Utils/Evaluator.h"
|
||||
#include "llvm/Transforms/Utils/GlobalStatus.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <algorithm>
|
||||
using namespace llvm;
|
||||
|
||||
@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
|
||||
// Instructions could multiply use V.
|
||||
while (UI != E && *UI == I)
|
||||
++UI;
|
||||
I->eraseFromParent();
|
||||
if (isInstructionTriviallyDead(I, TLI))
|
||||
I->eraseFromParent();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold(
|
||||
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
|
||||
"(default = 75)"));
|
||||
|
||||
static cl::opt<bool> EnableGVNHoist(
|
||||
"enable-gvn-hoist", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable the experimental GVN Hoisting pass"));
|
||||
|
||||
PassManagerBuilder::PassManagerBuilder() {
|
||||
OptLevel = 2;
|
||||
SizeLevel = 0;
|
||||
@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager(
|
||||
FPM.add(createCFGSimplificationPass());
|
||||
FPM.add(createSROAPass());
|
||||
FPM.add(createEarlyCSEPass());
|
||||
FPM.add(createGVNHoistPass());
|
||||
if(EnableGVNHoist)
|
||||
FPM.add(createGVNHoistPass());
|
||||
FPM.add(createLowerExpectIntrinsicPass());
|
||||
}
|
||||
|
||||
|
@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
|
||||
// decomposeBitTestICmp() might help.
|
||||
{
|
||||
unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
|
||||
unsigned BitWidth =
|
||||
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
|
||||
APInt MinSignedValue = APInt::getSignBit(BitWidth);
|
||||
Value *X;
|
||||
const APInt *Y, *C;
|
||||
|
@ -2830,7 +2830,8 @@ bool InstCombiner::run() {
|
||||
// Add operands to the worklist.
|
||||
replaceInstUsesWith(*I, C);
|
||||
++NumConstProp;
|
||||
eraseInstFromFunction(*I);
|
||||
if (isInstructionTriviallyDead(I, TLI))
|
||||
eraseInstFromFunction(*I);
|
||||
MadeIRChange = true;
|
||||
continue;
|
||||
}
|
||||
@ -2851,7 +2852,8 @@ bool InstCombiner::run() {
|
||||
// Add operands to the worklist.
|
||||
replaceInstUsesWith(*I, C);
|
||||
++NumConstProp;
|
||||
eraseInstFromFunction(*I);
|
||||
if (isInstructionTriviallyDead(I, TLI))
|
||||
eraseInstFromFunction(*I);
|
||||
MadeIRChange = true;
|
||||
continue;
|
||||
}
|
||||
@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
|
||||
<< *Inst << '\n');
|
||||
Inst->replaceAllUsesWith(C);
|
||||
++NumConstProp;
|
||||
Inst->eraseFromParent();
|
||||
if (isInstructionTriviallyDead(Inst, TLI))
|
||||
Inst->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user