Vendor import of llvm release_39 branch r278877:

https://llvm.org/svn/llvm-project/llvm/branches/release_39@278877
This commit is contained in:
Dimitry Andric 2016-08-17 19:33:52 +00:00
parent c3aee98e72
commit a7fe922b98
207 changed files with 4434 additions and 1325 deletions

View File

@ -293,6 +293,7 @@ endif()
option(LLVM_ENABLE_CXX1Y "Compile with C++1y enabled." OFF)
option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF)
option(LLVM_ENABLE_LIBCXXABI "Use libc++abi when using libc++." OFF)
option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF)
option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)

View File

@ -61,8 +61,6 @@ licenses, and/or restrictions:
Program Directory
------- ---------
Autoconf llvm/autoconf
llvm/projects/ModuleMaker/autoconf
Google Test llvm/utils/unittest/googletest
OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex}
pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT}

View File

@ -144,6 +144,12 @@ function(add_flag_or_print_warning flag name)
endif()
endfunction()
if(LLVM_ENABLE_LLD)
check_cxx_compiler_flag("-fuse-ld=lld" CXX_SUPPORTS_LLD)
append_if(CXX_SUPPORTS_LLD "-fuse-ld=lld"
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
endif()
if( LLVM_ENABLE_PIC )
if( XCODE )
# Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't

View File

@ -436,7 +436,7 @@ For example, consider this simple LLVM example:
The X86 instruction selector might produce this machine code for the ``div`` and
``ret``:
.. code-block:: llvm
.. code-block:: text
;; Start of div
%EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX
@ -453,7 +453,7 @@ By the end of code generation, the register allocator would coalesce the
registers and delete the resultant identity moves producing the following
code:
.. code-block:: llvm
.. code-block:: text
;; X is in EAX, Y is in ECX
mov %EAX, %EDX
@ -965,7 +965,7 @@ target code. For example, consider the following LLVM fragment:
This LLVM code corresponds to a SelectionDAG that looks basically like this:
.. code-block:: llvm
.. code-block:: text
(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)

View File

@ -144,7 +144,7 @@ exists anywhere in the file.
The FileCheck -check-prefix option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The FileCheck :option:`-check-prefix` option allows multiple test
The FileCheck `-check-prefix` option allows multiple test
configurations to be driven from one `.ll` file. This is useful in many
circumstances, for example, testing different architectural variants with
:program:`llc`. Here's a simple example:
@ -303,7 +303,7 @@ be aware that the definition rule can match `after` its use.
So, for instance, the code below will pass:
.. code-block:: llvm
.. code-block:: text
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
; CHECK-DAG: vmov.32 [[REG2]][1]
@ -312,7 +312,7 @@ So, for instance, the code below will pass:
While this other code, will not:
.. code-block:: llvm
.. code-block:: text
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
; CHECK-DAG: vmov.32 [[REG2]][1]
@ -473,7 +473,7 @@ To match newline characters in regular expressions the character class
matches output of the form (from llvm-dwarfdump):
.. code-block:: llvm
.. code-block:: text
DW_AT_location [DW_FORM_sec_offset] (0x00000233)
DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd")

View File

@ -68,11 +68,11 @@ OPTIONS
.. option:: -B (default)
Use BSD output format. Alias for :option:`--format=bsd`.
Use BSD output format. Alias for `--format=bsd`.
.. option:: -P
Use POSIX.2 output format. Alias for :option:`--format=posix`.
Use POSIX.2 output format. Alias for `--format=posix`.
.. option:: --debug-syms, -a

View File

@ -12,16 +12,16 @@ DESCRIPTION
The :program:`opt` command is the modular LLVM optimizer and analyzer. It
takes LLVM source files as input, runs the specified optimizations or analyses
on it, and then outputs the optimized file or the analysis results. The
function of :program:`opt` depends on whether the :option:`-analyze` option is
function of :program:`opt` depends on whether the `-analyze` option is
given.
When :option:`-analyze` is specified, :program:`opt` performs various analyses
When `-analyze` is specified, :program:`opt` performs various analyses
of the input source. It will usually print the results on standard output, but
in a few cases, it will print output to standard error or generate a file with
the analysis output, which is usually done when the output is meant for another
program.
While :option:`-analyze` is *not* given, :program:`opt` attempts to produce an
While `-analyze` is *not* given, :program:`opt` attempts to produce an
optimized output file. The optimizations available via :program:`opt` depend
upon what libraries were linked into it as well as any additional libraries
that have been loaded with the :option:`-load` option. Use the :option:`-help`
@ -68,19 +68,19 @@ OPTIONS
.. option:: -disable-opt
This option is only meaningful when :option:`-std-link-opts` is given. It
This option is only meaningful when `-std-link-opts` is given. It
disables most passes.
.. option:: -strip-debug
This option causes opt to strip debug information from the module before
applying other optimizations. It is essentially the same as :option:`-strip`
applying other optimizations. It is essentially the same as `-strip`
but it ensures that stripping of debug information is done first.
.. option:: -verify-each
This option causes opt to add a verify pass after every pass otherwise
specified on the command line (including :option:`-verify`). This is useful
specified on the command line (including `-verify`). This is useful
for cases where it is suspected that a pass is creating an invalid module but
it is not clear which pass is doing it.

View File

@ -406,7 +406,7 @@ outlined. After the handler is outlined, this intrinsic is simply removed.
``llvm.eh.exceptionpointer``
----------------------------
.. code-block:: llvm
.. code-block:: text
i8 addrspace(N)* @llvm.eh.padparam.pNi8(token %catchpad)
@ -427,7 +427,7 @@ backend. Uses of them are generated by the backend's
``llvm.eh.sjlj.setjmp``
~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: llvm
.. code-block:: text
i32 @llvm.eh.sjlj.setjmp(i8* %setjmp_buf)
@ -664,7 +664,7 @@ all of the new IR instructions:
return 0;
}
.. code-block:: llvm
.. code-block:: text
define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 {
entry:
@ -741,7 +741,7 @@ C++ code:
}
}
.. code-block:: llvm
.. code-block:: text
define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
entry:

View File

@ -43,7 +43,7 @@ The following additional relocation types are supported:
corresponds to the COFF relocation types ``IMAGE_REL_I386_DIR32NB`` (32-bit) or
``IMAGE_REL_AMD64_ADDR32NB`` (64-bit).
.. code-block:: gas
.. code-block:: text
.text
fun:

View File

@ -204,7 +204,7 @@ IR features is specified by the selected :ref:`GC strategy description
Specifying GC code generation: ``gc "..."``
-------------------------------------------
.. code-block:: llvm
.. code-block:: text
define <returntype> @name(...) gc "name" { ... }

View File

@ -105,7 +105,7 @@ memory, or a global variable.
To make this clear, let's consider a more obtuse example:
.. code-block:: llvm
.. code-block:: text
%MyVar = uninitialized global i32
...
@ -142,7 +142,7 @@ Quick answer: there are no superfluous indices.
This question arises most often when the GEP instruction is applied to a global
variable which is always a pointer type. For example, consider this:
.. code-block:: llvm
.. code-block:: text
%MyStruct = uninitialized global { float*, i32 }
...
@ -178,7 +178,7 @@ The GetElementPtr instruction dereferences nothing. That is, it doesn't access
memory in any way. That's what the Load and Store instructions are for. GEP is
only involved in the computation of addresses. For example, consider this:
.. code-block:: llvm
.. code-block:: text
%MyVar = uninitialized global { [40 x i32 ]* }
...
@ -195,7 +195,7 @@ illegal.
In order to access the 18th integer in the array, you would need to do the
following:
.. code-block:: llvm
.. code-block:: text
%idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0
%arr = load [40 x i32]** %idx
@ -204,7 +204,7 @@ following:
In this case, we have to load the pointer in the structure with a load
instruction before we can index into the array. If the example was changed to:
.. code-block:: llvm
.. code-block:: text
%MyVar = uninitialized global { [40 x i32 ] }
...

View File

@ -30,7 +30,7 @@ instructions with each other. These tables are emitted in the
``XXXInstrInfo.inc`` file along with the functions to query them. Following
is the definition of ``InstrMapping`` class definied in Target.td file:
.. code-block:: llvm
.. code-block:: text
class InstrMapping {
// Used to reduce search space only to the instructions using this
@ -69,7 +69,7 @@ non-predicated form by assigning appropriate values to the ``InstrMapping``
fields. For this relationship, non-predicated instructions are treated as key
instruction since they are the one used to query the interface function.
.. code-block:: llvm
.. code-block:: text
def getPredOpcode : InstrMapping {
// Choose a FilterClass that is used as a base class for all the
@ -116,7 +116,7 @@ to include relevant information in its definition. For example, consider
following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false)
instructions:
.. code-block:: llvm
.. code-block:: text
def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
"$dst = add($a, $b)",
@ -137,7 +137,7 @@ In this step, we modify these instructions to include the information
required by the relationship model, <tt>getPredOpcode</tt>, so that they can
be related.
.. code-block:: llvm
.. code-block:: text
def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
"$dst = add($a, $b)",

View File

@ -41,7 +41,7 @@ that passes two default-constructed ``Foo`` objects to ``g`` in the
g(Foo(), Foo());
}
.. code-block:: llvm
.. code-block:: text
%struct.Foo = type { i32, i32 }
declare void @Foo_ctor(%struct.Foo* %this)

View File

@ -839,7 +839,7 @@ Note that the Mach-O platform doesn't support COMDATs and ELF only supports
Here is an example of a COMDAT group where a function will only be selected if
the COMDAT key's section is the largest:
.. code-block:: llvm
.. code-block:: text
$foo = comdat largest
@foo = global i32 2, comdat($foo)
@ -851,7 +851,7 @@ the COMDAT key's section is the largest:
As a syntactic sugar the ``$name`` can be omitted if the name is the same as
the global name:
.. code-block:: llvm
.. code-block:: text
$foo = comdat any
@foo = global i32 2, comdat
@ -875,7 +875,7 @@ if a collision occurs in the symbol table.
The combined use of COMDATS and section attributes may yield surprising results.
For example:
.. code-block:: llvm
.. code-block:: text
$foo = comdat any
$bar = comdat any
@ -1205,7 +1205,7 @@ makes the format of the prologue data highly target dependent.
A trivial example of valid prologue data for the x86 architecture is ``i8 144``,
which encodes the ``nop`` instruction:
.. code-block:: llvm
.. code-block:: text
define void @f() prologue i8 144 { ... }
@ -1213,7 +1213,7 @@ Generally prologue data can be formed by encoding a relative branch instruction
which skips the metadata, as in this example of valid prologue data for the
x86_64 architecture, where the first two bytes encode ``jmp .+10``:
.. code-block:: llvm
.. code-block:: text
%0 = type <{ i8, i8, i8* }>
@ -2237,7 +2237,7 @@ source file name to the local function name.
The syntax for the source file name is simply:
.. code-block:: llvm
.. code-block:: text
source_filename = "/path/to/source.c"
@ -2847,7 +2847,7 @@ cleared low bit. However, in the ``%C`` example, the optimizer is
allowed to assume that the '``undef``' operand could be the same as
``%Y``, allowing the whole '``select``' to be eliminated.
.. code-block:: llvm
.. code-block:: text
%A = xor undef, undef
@ -2899,7 +2899,7 @@ does not execute at all. This allows us to delete the divide and all
code after it. Because the undefined operation "can't happen", the
optimizer can assume that it occurs in dead code.
.. code-block:: llvm
.. code-block:: text
a: store undef -> %X
b: store %X -> undef
@ -3884,7 +3884,7 @@ their operand. For example:
Metadata nodes that aren't uniqued use the ``distinct`` keyword. For example:
.. code-block:: llvm
.. code-block:: text
!0 = distinct !{!"test\00", i32 10}
@ -3949,7 +3949,7 @@ fields are tuples containing the debug info to be emitted along with the compile
unit, regardless of code optimizations (some nodes are only emitted if there are
references to them from instructions).
.. code-block:: llvm
.. code-block:: text
!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
isOptimized: true, flags: "-O2", runtimeVersion: 2,
@ -3985,7 +3985,7 @@ DIBasicType
``DIBasicType`` nodes represent primitive types, such as ``int``, ``bool`` and
``float``. ``tag:`` defaults to ``DW_TAG_base_type``.
.. code-block:: llvm
.. code-block:: text
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
encoding: DW_ATE_unsigned_char)
@ -3994,7 +3994,7 @@ DIBasicType
The ``encoding:`` describes the details of the type. Usually it's one of the
following:
.. code-block:: llvm
.. code-block:: text
DW_ATE_address = 1
DW_ATE_boolean = 2
@ -4014,7 +4014,7 @@ refers to a tuple; the first operand is the return type, while the rest are the
types of the formal arguments in order. If the first operand is ``null``, that
represents a function with no return value (such as ``void foo() {}`` in C++).
.. code-block:: llvm
.. code-block:: text
!0 = !BasicType(name: "int", size: 32, align: 32, DW_ATE_signed)
!1 = !BasicType(name: "char", size: 8, align: 8, DW_ATE_signed_char)
@ -4028,7 +4028,7 @@ DIDerivedType
``DIDerivedType`` nodes represent types derived from other types, such as
qualified types.
.. code-block:: llvm
.. code-block:: text
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
encoding: DW_ATE_unsigned_char)
@ -4037,7 +4037,7 @@ qualified types.
The following ``tag:`` values are valid:
.. code-block:: llvm
.. code-block:: text
DW_TAG_member = 13
DW_TAG_pointer_type = 15
@ -4089,7 +4089,7 @@ does not have ``flags: DIFlagFwdDecl`` set. LLVM tools that link modules
together will unique such definitions at parse time via the ``identifier:``
field, even if the nodes are ``distinct``.
.. code-block:: llvm
.. code-block:: text
!0 = !DIEnumerator(name: "SixKind", value: 7)
!1 = !DIEnumerator(name: "SevenKind", value: 7)
@ -4100,7 +4100,7 @@ field, even if the nodes are ``distinct``.
The following ``tag:`` values are valid:
.. code-block:: llvm
.. code-block:: text
DW_TAG_array_type = 1
DW_TAG_class_type = 2
@ -4219,7 +4219,7 @@ type with an ODR ``identifier:`` and that does not set ``flags: DIFwdDecl``,
then the subprogram declaration is uniqued based only on its ``linkageName:``
and ``scope:``.
.. code-block:: llvm
.. code-block:: text
define void @_Z3foov() !dbg !0 {
...
@ -4244,7 +4244,7 @@ DILexicalBlock
two lexical blocks at same depth. They are valid targets for ``scope:``
fields.
.. code-block:: llvm
.. code-block:: text
!0 = distinct !DILexicalBlock(scope: !1, file: !2, line: 7, column: 35)
@ -4290,7 +4290,7 @@ the ``arg:`` field is set to non-zero, then this variable is a subprogram
parameter, and it will be included in the ``variables:`` field of its
:ref:`DISubprogram`.
.. code-block:: llvm
.. code-block:: text
!0 = !DILocalVariable(name: "this", arg: 1, scope: !3, file: !2, line: 7,
type: !3, flags: DIFlagArtificial)
@ -4313,7 +4313,7 @@ The current supported vocabulary is limited:
- ``DW_OP_bit_piece, 16, 8`` specifies the offset and size (``16`` and ``8``
here, respectively) of the variable piece from the working expression.
.. code-block:: llvm
.. code-block:: text
!0 = !DIExpression(DW_OP_deref)
!1 = !DIExpression(DW_OP_plus, 3)
@ -4336,7 +4336,7 @@ DIImportedEntity
``DIImportedEntity`` nodes represent entities (such as modules) imported into a
compile unit.
.. code-block:: llvm
.. code-block:: text
!2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0,
entity: !1, line: 7)
@ -4349,7 +4349,7 @@ The ``name:`` field is the macro identifier, followed by macro parameters when
defining a function-like macro, and the ``value`` field is the token-string
used to expand the macro identifier.
.. code-block:: llvm
.. code-block:: text
!2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)",
value: "((x) + 1)")
@ -4362,7 +4362,7 @@ DIMacroFile
The ``nodes:`` field is a list of ``DIMacro`` and ``DIMacroFile`` nodes that
appear in the included source file.
.. code-block:: llvm
.. code-block:: text
!2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2,
nodes: !3)
@ -5660,7 +5660,7 @@ block. Therefore, it must be the only non-phi instruction in the block.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
dispatch1:
%cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller
@ -5711,7 +5711,7 @@ the ``catchret``'s behavior is undefined.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
catchret from %catch label %continue
@ -5761,7 +5761,7 @@ It transfers control to ``continue`` or unwinds out of the function.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
cleanupret from %cleanup unwind to caller
cleanupret from %cleanup unwind label %continue
@ -5851,7 +5851,7 @@ unsigned and/or signed overflow, respectively, occurs.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = add i32 4, %var ; yields i32:result = 4 + %var
@ -5890,7 +5890,7 @@ optimizations:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = fadd float 4.0, %var ; yields float:result = 4.0 + %var
@ -5942,7 +5942,7 @@ unsigned and/or signed overflow, respectively, occurs.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = sub i32 4, %var ; yields i32:result = 4 - %var
<result> = sub i32 0, %val ; yields i32:result = -%var
@ -5985,7 +5985,7 @@ unsafe floating point optimizations:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = fsub float 4.0, %var ; yields float:result = 4.0 - %var
<result> = fsub float -0.0, %val ; yields float:result = -%var
@ -6039,7 +6039,7 @@ unsigned and/or signed overflow, respectively, occurs.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = mul i32 4, %var ; yields i32:result = 4 * %var
@ -6078,7 +6078,7 @@ unsafe floating point optimizations:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = fmul float 4.0, %var ; yields float:result = 4.0 * %var
@ -6122,7 +6122,7 @@ such, "((a udiv exact b) mul b) == a").
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = udiv i32 4, %var ; yields i32:result = 4 / %var
@ -6168,7 +6168,7 @@ a :ref:`poison value <poisonvalues>` if the result would be rounded.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = sdiv i32 4, %var ; yields i32:result = 4 / %var
@ -6207,7 +6207,7 @@ unsafe floating point optimizations:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = fdiv float 4.0, %var ; yields float:result = 4.0 / %var
@ -6249,7 +6249,7 @@ Taking the remainder of a division by zero leads to undefined behavior.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = urem i32 4, %var ; yields i32:result = 4 % %var
@ -6304,7 +6304,7 @@ result of the division and the remainder.)
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = srem i32 4, %var ; yields i32:result = 4 % %var
@ -6344,7 +6344,7 @@ to enable otherwise unsafe floating point optimizations:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = frem float 4.0, %var ; yields float:result = 4.0 % %var
@ -6406,7 +6406,7 @@ nsw/nuw bits in (mul %op1, (shl 1, %op2)).
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = shl i32 4, %var ; yields i32: 4 << %var
<result> = shl i32 4, 2 ; yields i32: 16
@ -6455,7 +6455,7 @@ non-zero.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = lshr i32 4, 1 ; yields i32:result = 2
<result> = lshr i32 4, 2 ; yields i32:result = 1
@ -6506,7 +6506,7 @@ non-zero.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = ashr i32 4, 1 ; yields i32:result = 2
<result> = ashr i32 4, 2 ; yields i32:result = 1
@ -6558,7 +6558,7 @@ The truth table used for the '``and``' instruction is:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = and i32 4, %var ; yields i32:result = 4 & %var
<result> = and i32 15, 40 ; yields i32:result = 8
@ -6657,7 +6657,7 @@ The truth table used for the '``xor``' instruction is:
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = xor i32 4, %var ; yields i32:result = 4 ^ %var
<result> = xor i32 15, 40 ; yields i32:result = 39
@ -6710,7 +6710,7 @@ exceeds the length of ``val``, the results are undefined.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = extractelement <4 x i32> %vec, i32 0 ; yields i32
@ -6752,7 +6752,7 @@ undefined.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = insertelement <4 x i32> %vec, i32 1, i32 0 ; yields <4 x i32>
@ -6800,7 +6800,7 @@ only one vector.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
<4 x i32> <i32 0, i32 4, i32 1, i32 5> ; yields <4 x i32>
@ -6859,7 +6859,7 @@ the index operands.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = extractvalue {i32, float} %agg, 0 ; yields i32
@ -8126,7 +8126,7 @@ or :ref:`ptrtoint <i_ptrtoint>` instructions first.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
%X = bitcast i8 255 to i8 ; yields i8 :-1
%Y = bitcast i32* %x to sint* ; yields sint*:%x
@ -8265,7 +8265,7 @@ as the values being compared. Otherwise, the result is an ``i1``.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = icmp eq i32 4, 5 ; yields: result=false
<result> = icmp ne float* %X, %X ; yields: result=false
@ -8379,7 +8379,7 @@ assumptions to be made about the values of input arguments; namely
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
<result> = fcmp oeq float 4.0, 5.0 ; yields: result=false
<result> = fcmp one float 4.0, 5.0 ; yields: result=true
@ -8815,7 +8815,7 @@ that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
dispatch:
%cs = catchswitch within none [label %handler0] unwind to caller
@ -8885,7 +8885,7 @@ that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
Example:
""""""""
.. code-block:: llvm
.. code-block:: text
%tok = cleanuppad within %cs []
@ -12481,19 +12481,19 @@ optimistic assumptions made during compilation. The semantics of
``@llvm.experimental.deoptimize`` -- its body is defined to be
equivalent to:
.. code-block:: llvm
.. code-block:: text
define void @llvm.experimental.guard(i1 %pred, <args...>) {
%realPred = and i1 %pred, undef
br i1 %realPred, label %continue, label %leave [, !make.implicit !{}]
define void @llvm.experimental.guard(i1 %pred, <args...>) {
%realPred = and i1 %pred, undef
br i1 %realPred, label %continue, label %leave [, !make.implicit !{}]
leave:
call void @llvm.experimental.deoptimize(<args...>) [ "deopt"() ]
ret void
leave:
call void @llvm.experimental.deoptimize(<args...>) [ "deopt"() ]
ret void
continue:
ret void
}
continue:
ret void
}
with the optional ``[, !make.implicit !{}]`` present if and only if it

View File

@ -111,7 +111,6 @@ Here is an example of a YAML document that contains an LLVM module:
.. code-block:: llvm
--- |
define i32 @inc(i32* %x) {
entry:
%0 = load i32, i32* %x
@ -119,7 +118,6 @@ Here is an example of a YAML document that contains an LLVM module:
store i32 %1, i32* %x
ret i32 %1
}
...
.. _YAML block literal string: http://www.yaml.org/spec/1.2/spec.html#id2795688
@ -129,7 +127,7 @@ Machine Functions
The remaining YAML documents contain the machine functions. This is an example
of such YAML document:
.. code-block:: llvm
.. code-block:: text
---
name: inc
@ -172,7 +170,7 @@ A machine basic block is defined in a single block definition source construct
that contains the block's ID.
The example below defines two blocks that have an ID of zero and one:
.. code-block:: llvm
.. code-block:: text
bb.0:
<instructions>
@ -182,7 +180,7 @@ The example below defines two blocks that have an ID of zero and one:
A machine basic block can also have a name. It should be specified after the ID
in the block's definition:
.. code-block:: llvm
.. code-block:: text
bb.0.entry: ; This block's name is "entry"
<instructions>
@ -196,7 +194,7 @@ Block References
The machine basic blocks are identified by their ID numbers. Individual
blocks are referenced using the following syntax:
.. code-block:: llvm
.. code-block:: text
%bb.<id>[.<name>]
@ -213,7 +211,7 @@ Successors
The machine basic block's successors have to be specified before any of the
instructions:
.. code-block:: llvm
.. code-block:: text
bb.0.entry:
successors: %bb.1.then, %bb.2.else
@ -227,7 +225,7 @@ The branch weights can be specified in brackets after the successor blocks.
The example below defines a block that has two successors with branch weights
of 32 and 16:
.. code-block:: llvm
.. code-block:: text
bb.0.entry:
successors: %bb.1.then(32), %bb.2.else(16)
@ -240,7 +238,7 @@ Live In Registers
The machine basic block's live in registers have to be specified before any of
the instructions:
.. code-block:: llvm
.. code-block:: text
bb.0.entry:
liveins: %edi, %esi
@ -255,7 +253,7 @@ Miscellaneous Attributes
The attributes ``IsAddressTaken``, ``IsLandingPad`` and ``Alignment`` can be
specified in brackets after the block's definition:
.. code-block:: llvm
.. code-block:: text
bb.0.entry (address-taken):
<instructions>
@ -278,7 +276,7 @@ The instruction's name is usually specified before the operands. The example
below shows an instance of the X86 ``RETQ`` instruction with a single machine
operand:
.. code-block:: llvm
.. code-block:: text
RETQ %eax
@ -287,7 +285,7 @@ operands, the instruction's name has to be specified after them. The example
below shows an instance of the AArch64 ``LDPXpost`` instruction with three
defined register operands:
.. code-block:: llvm
.. code-block:: text
%sp, %fp, %lr = LDPXpost %sp, 2
@ -303,7 +301,7 @@ Instruction Flags
The flag ``frame-setup`` can be specified before the instruction's name:
.. code-block:: llvm
.. code-block:: text
%fp = frame-setup ADDXri %sp, 0, 0
@ -321,13 +319,13 @@ but they can also be used in a number of other places, like the
The physical registers are identified by their name. They use the following
syntax:
.. code-block:: llvm
.. code-block:: text
%<name>
The example below shows three X86 physical registers:
.. code-block:: llvm
.. code-block:: text
%eax
%r15
@ -336,13 +334,13 @@ The example below shows three X86 physical registers:
The virtual registers are identified by their ID number. They use the following
syntax:
.. code-block:: llvm
.. code-block:: text
%<id>
Example:
.. code-block:: llvm
.. code-block:: text
%0
@ -366,7 +364,7 @@ The immediate machine operands are untyped, 64-bit signed integers. The
example below shows an instance of the X86 ``MOV32ri`` instruction that has an
immediate machine operand ``-42``:
.. code-block:: llvm
.. code-block:: text
%eax = MOV32ri -42
@ -384,14 +382,14 @@ machine operands. The register operands can also have optional
and a reference to the tied register operand.
The full syntax of a register operand is shown below:
.. code-block:: llvm
.. code-block:: text
[<flags>] <register> [ :<subregister-idx-name> ] [ (tied-def <tied-op>) ]
This example shows an instance of the X86 ``XOR32rr`` instruction that has
5 register operands with different register flags:
.. code-block:: llvm
.. code-block:: text
dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
@ -446,7 +444,7 @@ the subregister indices. The example below shows an instance of the ``COPY``
pseudo instruction that uses the X86 ``sub_8bit`` subregister index to copy 8
lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1:
.. code-block:: llvm
.. code-block:: text
%1 = COPY %0:sub_8bit
@ -461,7 +459,7 @@ The global value machine operands reference the global values from the
The example below shows an instance of the X86 ``MOV64rm`` instruction that has
a global value operand named ``G``:
.. code-block:: llvm
.. code-block:: text
%rax = MOV64rm %rip, 1, _, @G, _

View File

@ -70,7 +70,7 @@ clients.
For example, a possible annotation of an ARM load of a stack-relative location
might be annotated as:
.. code-block:: nasm
.. code-block:: text
ldr <reg gpr:r0>, <mem regoffset:[<reg gpr:sp>, <imm:#4>]>

View File

@ -394,7 +394,7 @@ and in right function "*FR*". And every part of *left* place is equal to the
corresponding part of *right* place, and (!) both parts use *Value* instances,
for example:
.. code-block:: llvm
.. code-block:: text
instr0 i32 %LV ; left side, function FL
instr0 i32 %RV ; right side, function FR
@ -409,13 +409,13 @@ in "*FL*" and "*FR*".
Consider small example here:
.. code-block:: llvm
.. code-block:: text
define void %f(i32 %pf0, i32 %pf1) {
instr0 i32 %pf0 instr1 i32 %pf1 instr2 i32 123
}
.. code-block:: llvm
.. code-block:: text
define void %g(i32 %pg0, i32 %pg1) {
instr0 i32 %pg0 instr1 i32 %pg0 instr2 i32 123

View File

@ -37,7 +37,7 @@ code. By default, the back-end will emit device functions. Metadata is used to
declare a function as a kernel function. This metadata is attached to the
``nvvm.annotations`` named metadata object, and has the following format:
.. code-block:: llvm
.. code-block:: text
!0 = !{<function-ref>, metadata !"kernel", i32 1}

View File

@ -40,7 +40,10 @@ Non-comprehensive list of changes in this release
* There is no longer a "global context" available in LLVM, except for the C API.
* .. note about autoconf build having been removed.
* The autoconf build system has been removed in favor of CMake. LLVM 3.9
requires CMake 3.4.3 or later to build. For information about using CMake
please see the documentation on :doc:`CMake`. For information about the CMake
language there is also a :doc:`CMakePrimer` document available.
* .. note about C API functions LLVMParseBitcode,
LLVMParseBitcodeInContext, LLVMGetBitcodeModuleInContext and
@ -69,11 +72,13 @@ Non-comprehensive list of changes in this release
need to be updated to replace the argument node and remove any dead nodes in
cases where they currently return an ``SDNode *`` from this interface.
* Introduction of ThinLTO: [FIXME: needs to be documented more extensively in
/docs/ ; ping Mehdi/Teresa before the release if not done]
* Raised the minimum required CMake version to 3.4.3.
* Added the MemorySSA analysis, which hopes to replace MemoryDependenceAnalysis.
It should provide higher-quality results than MemDep, and be algorithmically
faster than MemDep. Currently, GVNHoist (which is off by default) makes use of
MemorySSA.
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
@ -93,6 +98,32 @@ Non-comprehensive list of changes in this release
Makes programs 10x faster by doing Special New Thing.
GCC ABI Tag
-----------
Recently, many of the Linux distributions (ex. `Fedora <http://developerblog.redhat.com/2015/02/10/gcc-5-in-fedora/>`_,
`Debian <https://wiki.debian.org/GCC5>`_, `Ubuntu <https://wiki.ubuntu.com/GCC5>`_)
have moved on to use the new `GCC ABI <https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Attributes.html>`_
to work around `C++11 incompatibilities in libstdc++ <https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html>`_.
This caused `incompatibility problems <https://gcc.gnu.org/ml/gcc-patches/2015-04/msg00153.html>`_
with other compilers (ex. Clang), which needed to be fixed, but due to the
experimental nature of GCC's own implementation, it took a long time for it to
land in LLVM (`here <https://reviews.llvm.org/D18035>`_ and
`here <https://reviews.llvm.org/D17567>`_), not in time for the 3.8 release.
Those patches are now present in the 3.9.0 release and should be working on the
majority of cases, as they have been tested thoroughly. However, some bugs were
`filled in GCC <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71712>`_ and have not
yet been fixed, so there may be corner cases not covered by either GCC or Clang.
Bug fixes to those problems should be reported in Bugzilla (either LLVM or GCC),
and patches to LLVM's trunk are very likely to be back-ported to future 3.9.x
releases (depends on how destructive it is).
Unfortunately, these patches won't be back-ported to 3.8.x or earlier, so we
strongly recommend people to use 3.9.x when GCC ABI cases are at stake.
For a more in-depth view of the issue, check our `Bugzilla entry <https://llvm.org/bugs/show_bug.cgi?id=23529>`_.
Changes to the LLVM IR
----------------------
@ -110,16 +141,98 @@ link-time may be differently optimized than the one what was visible
during optimization, and may have arbitrarily different observable
behavior. See `PR26774 <http://llvm.org/PR26774>`_ for more details.
Changes to the ARM Backend
Support for ThinLTO
-------------------
LLVM now supports ThinLTO compilation, which can be invoked by compiling
and linking with -flto=thin. The gold linker plugin, as well as linkers
that use the new ThinLTO API in libLTO (like ld64), will transparently
execute the ThinLTO backends in parallel threads.
For more information on ThinLTO and the LLVM implementation, see the
`ThinLTO blog post <http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html>`_.
Changes to the ARM Targets
--------------------------
During this release ...
**During this release the AArch64 backend has:**
* Gained support for Qualcomm's Kryo and Broadcom's Vulcan CPUs, including
scheduling models.
* Landed a scheduling model for Samsung's Exynos M1.
* Seen a lot of work on GlobalISel.
* Learned a few more useful combines (fadd and fmul into fmadd, adjustments to the
stack pointer for callee-save stack memory and local stack memory etc).
* Gained support for the Swift calling convention.
* Switched to using SubtargetFeatures rather than testing for specific CPUs and
to using TableGen for handling system instruction operands.
* Like ARM, AArch64 is now using the TargetParser, so no more StringSwitches
matching CPU, FPU or feature names will be accepted in normal code.
* Clang can now self-host itself using LLD on AArch64.
* Gained a big batch of tests from Halide.
Furthermore, LLDB now supports AArch64 compact unwind tables, as used on iOS,
tvos and watchos.
**During this release the ARM target has:**
* ARMv8.2-A can now be targeted directly via Clang flags.
* Adding preliminary support for Cortex-R8.
* LLDB can now parse EABI attributes for an ELF input.
* Initial ARM/Thumb support was added to LLD.
* The ExecutionEngine now supports COFF/ARM.
* Swift calling convention was ported to ARM.
* A large number of codegen fixes around ARMv8, DSP, correct sub-target support,
relocations, EABI, EHABI, Windows on ARM, atomics..
* Improved assembler support for Linux/Android/Chromium sub-projects.
* Initial support for MUSL (libc) on ARM.
* Support for Thumb1 targets in libunwind.
* Gained a big batch of tests from Halide.
Changes to the MIPS Target
--------------------------
During this release ...
**During this release the MIPS target has:**
* Enabled the Integrated Assembler by default for all ``mips-*`` and
``mipsel-*`` triples.
* Significantly improved the Integrated Assembler support for the n64 ABI.
* Added the Clang frontend ``-mcompact-branches={never,optimal,always}`` option
that controls how LLVM generates compact branches for MIPS targets.
* Improved performance and code size for stack pointer adjustments in functions
with large frames.
* Implemented many instructions from the microMIPS32R6 ISA and added CodeGen
support for most of them.
* Added support for the triple used by Debian Stretch for little endian
MIPS64, ie. ``mips64el-linux-gnuabi64``.
* Removed EABI which was neither tested nor properly supported.
* Gained the ability to self-host on MIPS32R6.
* Gained the ability to self-host on MIPS64R2 and MIPS64R6 when using the n64
ABI.
* Added support for the ``LA`` macro in PIC mode for o32.
* Added support for safestack in compiler-rt.
* Added support for the MIPS n64 ABI in LLD.
* Added LLD support for TLS relocations for both o32 and n64 MIPS ABIs.
**The MIPS target has also fixed various bugs including the following notable
fixes:**
* Delay slots are no longer filled multiple times when either ``-save-temps``
or ``-via-file-asm`` are used.
* Updated n32 and n64 to follow the standard ELF conventions for label prefixes
(``.L``), whereas o32 still uses its own (``$``).
* Properly sign-extend values to GPR width for instructions that expect 32-bit
values on 64-bit ISAs.
* Several fixes for the delay-slot filler pass, including correct
forbidden-slot hazard handling.
* Fixed several errors caught by the machine verifier when turned on for MIPS.
* Fixed broken predicate for ``SELECT`` patterns in MIPS64.
* Fixed wrong truncation of memory address for ``LL``/``SC`` seqeuences in
MIPS64.
* Fixed the o32, n32 and n64 handling of ``.cprestore`` directives when inside
a ``.set noat`` region by the Integrated Assembler.
* Fixed the ordering of ``HI``/``LO`` pairs in the relocation table.
* Fixed the generated ELF ``EFlags`` when Octeon is the target.
Changes to the PowerPC Target
@ -140,9 +253,16 @@ Changes to the X86 Target
extensions using ``-march=knl``. The switch enables the ISA extensions
AVX-512{F, CD, ER, PF}.
* LLVM will now prefer ``PUSH`` instructions rather than ``%esp``-relative
``MOV`` instructions for function calls at all optimization levels greater
than ``-O0``. Previously this transformation only occurred at ``-Os``.
Changes to the AMDGPU Target
-----------------------------
* Added backend support for OpenGL shader image, buffer storage, atomic
counter, and compute shader extensions (supported since Mesa 12)
* Mesa 11.0.x is no longer supported
@ -167,6 +287,21 @@ projects that have already been updated to work with LLVM 3.9.
* A project
LDC - the LLVM-based D compiler
-------------------------------
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
pragmatically combines efficiency, control, and modeling power, with safety and
programmer productivity. D supports powerful concepts like Compile-Time Function
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
to concurrency and offers many classical paradigms.
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
combined with LLVM as backend to produce efficient native code. LDC targets
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
are underway.
Additional Information
======================

View File

@ -33,7 +33,7 @@ current stack limit (minus the amount of space needed to allocate a new block) -
this slot's offset is again dictated by ``libgcc``. The generated
assembly looks like this on x86-64:
.. code-block:: nasm
.. code-block:: text
leaq -8(%rsp), %r10
cmpq %fs:112, %r10

View File

@ -230,7 +230,7 @@ following C fragment, for example:
Compiled to LLVM, this function would be represented like this:
.. code-block:: llvm
.. code-block:: text
; Function Attrs: nounwind ssp uwtable
define void @foo() #0 !dbg !4 {
@ -303,7 +303,7 @@ The first intrinsic ``%llvm.dbg.declare`` encodes debugging information for the
variable ``X``. The metadata ``!dbg !14`` attached to the intrinsic provides
scope information for the variable ``X``.
.. code-block:: llvm
.. code-block:: text
!14 = !DILocation(line: 2, column: 9, scope: !4)
!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
@ -327,7 +327,7 @@ The third intrinsic ``%llvm.dbg.declare`` encodes debugging information for
variable ``Z``. The metadata ``!dbg !19`` attached to the intrinsic provides
scope information for the variable ``Z``.
.. code-block:: llvm
.. code-block:: text
!18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
!19 = !DILocation(line: 5, column: 11, scope: !18)
@ -390,7 +390,7 @@ Given an integer global variable declared as follows:
a C/C++ front-end would generate the following descriptors:
.. code-block:: llvm
.. code-block:: text
;;
;; Define the global itself.
@ -456,7 +456,7 @@ Given a function declared as follows:
a C/C++ front-end would generate the following descriptors:
.. code-block:: llvm
.. code-block:: text
;;
;; Define the anchor for subprograms.

View File

@ -138,7 +138,7 @@ SSA value ``%obj.relocated`` which represents the potentially changed value of
``%obj`` after the safepoint and update any following uses appropriately. The
resulting relocation sequence is:
.. code-block:: llvm
.. code-block:: text
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
gc "statepoint-example" {
@ -237,7 +237,7 @@ afterwards.
If we extend our previous example to include a pointless derived pointer,
we get:
.. code-block:: llvm
.. code-block:: text
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
gc "statepoint-example" {
@ -283,7 +283,7 @@ Let's assume a hypothetical GC--somewhat unimaginatively named "hypothetical-gc"
--that requires that a TLS variable must be written to before and after a call
to unmanaged code. The resulting relocation sequence is:
.. code-block:: llvm
.. code-block:: text
@flag = thread_local global i32 0, align 4
@ -662,7 +662,7 @@ distinguish between GC references and non-GC references in IR it is given.
As an example, given this code:
.. code-block:: llvm
.. code-block:: text
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
gc "statepoint-example" {
@ -672,7 +672,7 @@ As an example, given this code:
The pass would produce this IR:
.. code-block:: llvm
.. code-block:: text
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
gc "statepoint-example" {
@ -737,7 +737,7 @@ As an example, given input IR of the following:
This pass would produce the following IR:
.. code-block:: llvm
.. code-block:: text
define void @test() gc "statepoint-example" {
%safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)

View File

@ -232,7 +232,7 @@ the record ends with a semicolon.
Here is a simple TableGen file:
.. code-block:: llvm
.. code-block:: text
class C { bit V = 1; }
def X : C;
@ -276,7 +276,7 @@ derived class or definition wants to override. Let expressions consist of the
value. For example, a new class could be added to the example above, redefining
the ``V`` field for all of its subclasses:
.. code-block:: llvm
.. code-block:: text
class D : C { let V = 0; }
def Z : D;
@ -295,7 +295,7 @@ concrete classes. Parameterized TableGen classes specify a list of variable
bindings (which may optionally have defaults) that are bound when used. Here is
a simple example:
.. code-block:: llvm
.. code-block:: text
class FPFormat<bits<3> val> {
bits<3> Value = val;
@ -316,7 +316,7 @@ integer.
The more esoteric forms of `TableGen expressions`_ are useful in conjunction
with template arguments. As an example:
.. code-block:: llvm
.. code-block:: text
class ModRefVal<bits<2> val> {
bits<2> Value = val;
@ -346,7 +346,7 @@ be used to decouple the interface provided to the user of the class from the
actual internal data representation expected by the class. In this case,
running ``llvm-tblgen`` on the example prints the following definitions:
.. code-block:: llvm
.. code-block:: text
def bork { // Value
bit isMod = 1;
@ -379,7 +379,7 @@ commonality exists, then in a separate place indicate what all the ops are.
Here is an example TableGen fragment that shows this idea:
.. code-block:: llvm
.. code-block:: text
def ops;
def GPR;
@ -405,7 +405,7 @@ inherit from multiple multiclasses, instantiating definitions from each
multiclass. Using a multiclass this way is exactly equivalent to instantiating
the classes multiple times yourself, e.g. by writing:
.. code-block:: llvm
.. code-block:: text
def ops;
def GPR;
@ -432,7 +432,7 @@ the classes multiple times yourself, e.g. by writing:
A ``defm`` can also be used inside a multiclass providing several levels of
multiclass instantiations.
.. code-block:: llvm
.. code-block:: text
class Instruction<bits<4> opc, string Name> {
bits<4> opcode = opc;
@ -473,7 +473,7 @@ multiclass instantiations.
the class list must start after the last multiclass, and there must be at least
one multiclass before them.
.. code-block:: llvm
.. code-block:: text
class XD { bits<4> Prefix = 11; }
class XS { bits<4> Prefix = 12; }
@ -516,7 +516,7 @@ specified file in place of the include directive. The filename should be
specified as a double quoted string immediately after the '``include``' keyword.
Example:
.. code-block:: llvm
.. code-block:: text
include "foo.td"
@ -532,7 +532,7 @@ commonality from the records.
File-scope "let" expressions take a comma-separated list of bindings to apply,
and one or more records to bind the values in. Here are some examples:
.. code-block:: llvm
.. code-block:: text
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in
def RET : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>;
@ -559,7 +559,7 @@ ways to factor out commonality from the records, specially if using several
levels of multiclass instantiations. This also avoids the need of using "let"
expressions within subsequent records inside a multiclass.
.. code-block:: llvm
.. code-block:: text
multiclass basic_r<bits<4> opc> {
let Predicates = [HasSSE2] in {
@ -587,7 +587,7 @@ TableGen supports the '``foreach``' block, which textually replicates the loop
body, substituting iterator values for iterator references in the body.
Example:
.. code-block:: llvm
.. code-block:: text
foreach i = [0, 1, 2, 3] in {
def R#i : Register<...>;
@ -598,7 +598,7 @@ This will create objects ``R0``, ``R1``, ``R2`` and ``R3``. ``foreach`` blocks
may be nested. If there is only one item in the body the braces may be
elided:
.. code-block:: llvm
.. code-block:: text
foreach i = [0, 1, 2, 3] in
def R#i : Register<...>;

View File

@ -90,7 +90,7 @@ of the classes, then all of the definitions. This is a good way to see what the
various definitions expand to fully. Running this on the ``X86.td`` file prints
this (at the time of this writing):
.. code-block:: llvm
.. code-block:: text
...
def ADD32rr { // Instruction X86Inst I
@ -155,7 +155,7 @@ by the code generator, and specifying it all manually would be unmaintainable,
prone to bugs, and tiring to do in the first place. Because we are using
TableGen, all of the information was derived from the following definition:
.. code-block:: llvm
.. code-block:: text
let Defs = [EFLAGS],
isCommutable = 1, // X = ADD Y,Z --> X = ADD Z,Y
@ -201,7 +201,7 @@ TableGen.
**TableGen definitions** are the concrete form of 'records'. These generally do
not have any undefined values, and are marked with the '``def``' keyword.
.. code-block:: llvm
.. code-block:: text
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
"Enable ARMv8 FP">;
@ -220,7 +220,7 @@ floating point instructions in the X86 backend). TableGen keeps track of all of
the classes that are used to build up a definition, so the backend can find all
definitions of a particular class, such as "Instruction".
.. code-block:: llvm
.. code-block:: text
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
@ -235,7 +235,7 @@ If a multiclass inherits from another multiclass, the definitions in the
sub-multiclass become part of the current multiclass, as if they were declared
in the current multiclass.
.. code-block:: llvm
.. code-block:: text
multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
dag address, ValueType sty> {

View File

@ -345,7 +345,7 @@ to define an object for each register. The specified string ``n`` becomes the
``Name`` of the register. The basic ``Register`` object does not have any
subregisters and does not specify any aliases.
.. code-block:: llvm
.. code-block:: text
class Register<string n> {
string Namespace = "";
@ -361,7 +361,7 @@ subregisters and does not specify any aliases.
For example, in the ``X86RegisterInfo.td`` file, there are register definitions
that utilize the ``Register`` class, such as:
.. code-block:: llvm
.. code-block:: text
def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
@ -414,7 +414,7 @@ classes. In ``Target.td``, the ``Register`` class is the base for the
``RegisterWithSubRegs`` class that is used to define registers that need to
specify subregisters in the ``SubRegs`` list, as shown here:
.. code-block:: llvm
.. code-block:: text
class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
let SubRegs = subregs;
@ -427,7 +427,7 @@ feature common to these subclasses. Note the use of "``let``" expressions to
override values that are initially defined in a superclass (such as ``SubRegs``
field in the ``Rd`` class).
.. code-block:: llvm
.. code-block:: text
class SparcReg<string n> : Register<n> {
field bits<5> Num;
@ -452,7 +452,7 @@ field in the ``Rd`` class).
In the ``SparcRegisterInfo.td`` file, there are register definitions that
utilize these subclasses of ``Register``, such as:
.. code-block:: llvm
.. code-block:: text
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
@ -478,7 +478,7 @@ default allocation order of the registers. A target description file
``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes
using the following class:
.. code-block:: llvm
.. code-block:: text
class RegisterClass<string namespace,
list<ValueType> regTypes, int alignment, dag regList> {
@ -532,7 +532,7 @@ defines a group of 32 single-precision floating-point registers (``F0`` to
``F31``); ``DFPRegs`` defines a group of 16 double-precision registers
(``D0-D15``).
.. code-block:: llvm
.. code-block:: text
// F0, F1, F2, ..., F31
def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
@ -703,7 +703,7 @@ which describes one instruction. An instruction descriptor defines:
The Instruction class (defined in ``Target.td``) is mostly used as a base for
more complex instruction classes.
.. code-block:: llvm
.. code-block:: text
class Instruction {
string Namespace = "";
@ -760,7 +760,7 @@ specific operation value for ``LD``/Load Word. The third parameter is the
output destination, which is a register operand and defined in the ``Register``
target description file (``IntRegs``).
.. code-block:: llvm
.. code-block:: text
def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
"ld [$addr], $dst",
@ -769,7 +769,7 @@ target description file (``IntRegs``).
The fourth parameter is the input source, which uses the address operand
``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``:
.. code-block:: llvm
.. code-block:: text
def MEMrr : Operand<i32> {
let PrintMethod = "printMemOperand";
@ -788,7 +788,7 @@ immediate value operands. For example, to perform a Load Integer instruction
for a Word from an immediate operand to a register, the following instruction
class is defined:
.. code-block:: llvm
.. code-block:: text
def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
"ld [$addr], $dst",
@ -801,7 +801,7 @@ creation of templates to define several instruction classes at once (using the
pattern ``F3_12`` is defined to create 2 instruction classes each time
``F3_12`` is invoked:
.. code-block:: llvm
.. code-block:: text
multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
def rr : F3_1 <2, Op3Val,
@ -818,7 +818,7 @@ So when the ``defm`` directive is used for the ``XOR`` and ``ADD``
instructions, as seen below, it creates four instruction objects: ``XORrr``,
``XORri``, ``ADDrr``, and ``ADDri``.
.. code-block:: llvm
.. code-block:: text
defm XOR : F3_12<"xor", 0b000011, xor>;
defm ADD : F3_12<"add", 0b000000, add>;
@ -830,7 +830,7 @@ For example, the 10\ :sup:`th` bit represents the "greater than" condition for
integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for
floats.
.. code-block:: llvm
.. code-block:: text
def ICC_NE : ICC_VAL< 9>; // Not Equal
def ICC_E : ICC_VAL< 1>; // Equal
@ -855,7 +855,7 @@ order they are defined. Fields are bound when they are assigned a value. For
example, the Sparc target defines the ``XNORrr`` instruction as a ``F3_1``
format instruction having three operands.
.. code-block:: llvm
.. code-block:: text
def XNORrr : F3_1<2, 0b000111,
(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
@ -865,7 +865,7 @@ format instruction having three operands.
The instruction templates in ``SparcInstrFormats.td`` show the base class for
``F3_1`` is ``InstSP``.
.. code-block:: llvm
.. code-block:: text
class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
field bits<32> Inst;
@ -880,7 +880,7 @@ The instruction templates in ``SparcInstrFormats.td`` show the base class for
``InstSP`` leaves the ``op`` field unbound.
.. code-block:: llvm
.. code-block:: text
class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
: InstSP<outs, ins, asmstr, pattern> {
@ -897,7 +897,7 @@ The instruction templates in ``SparcInstrFormats.td`` show the base class for
fields. ``F3`` format instructions will bind the operands ``rd``, ``op3``, and
``rs1`` fields.
.. code-block:: llvm
.. code-block:: text
class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
@ -925,7 +925,7 @@ TableGen definition will add all of its operands to an enumeration in the
llvm::XXX:OpName namespace and also add an entry for it into the OperandMap
table, which can be queried using getNamedOperandIdx()
.. code-block:: llvm
.. code-block:: text
int DstIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::dst); // => 0
int BIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::b); // => 1
@ -972,7 +972,7 @@ For example, the X86 backend defines ``brtarget`` and ``brtarget8``, both
instances of the TableGen ``Operand`` class, which represent branch target
operands:
.. code-block:: llvm
.. code-block:: text
def brtarget : Operand<OtherVT>;
def brtarget8 : Operand<OtherVT>;
@ -1222,14 +1222,14 @@ definitions in ``XXXInstrInfo.td``. For example, in ``SparcInstrInfo.td``,
this entry defines a register store operation, and the last parameter describes
a pattern with the store DAG operator.
.. code-block:: llvm
.. code-block:: text
def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
"st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>;
``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``:
.. code-block:: llvm
.. code-block:: text
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
@ -1240,7 +1240,7 @@ defined in an implementation of the Instructor Selector (such as
In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined
below:
.. code-block:: llvm
.. code-block:: text
def store : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
@ -1458,7 +1458,7 @@ if the current argument is of type ``f32`` or ``f64``), then the action is
performed. In this case, the ``CCAssignToReg`` action assigns the argument
value to the first available register: either ``R0`` or ``R1``.
.. code-block:: llvm
.. code-block:: text
CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
@ -1469,7 +1469,7 @@ which registers are used for specified scalar return types. A single-precision
float is returned to register ``F0``, and a double-precision float goes to
register ``D0``. A 32-bit integer is returned in register ``I0`` or ``I1``.
.. code-block:: llvm
.. code-block:: text
def RetCC_Sparc32 : CallingConv<[
CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
@ -1484,7 +1484,7 @@ the size of the slot, and the second parameter, also 4, indicates the stack
alignment along 4-byte units. (Special cases: if size is zero, then the ABI
size is used; if alignment is zero, then the ABI alignment is used.)
.. code-block:: llvm
.. code-block:: text
def CC_Sparc32 : CallingConv<[
// All arguments get passed in integer registers if there is space.
@ -1499,7 +1499,7 @@ the following example (in ``X86CallingConv.td``), the definition of
assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is
invoked.
.. code-block:: llvm
.. code-block:: text
def RetCC_X86_32_C : CallingConv<[
CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
@ -1514,7 +1514,7 @@ then a specified action is invoked. In the following example (in
``RetCC_X86_32_Fast`` is invoked. If the ``SSECall`` calling convention is in
use, then ``RetCC_X86_32_SSE`` is invoked.
.. code-block:: llvm
.. code-block:: text
def RetCC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
@ -1682,7 +1682,7 @@ feature, the value of the attribute, and a description of the feature. (The
fifth parameter is a list of features whose presence is implied, and its
default value is an empty array.)
.. code-block:: llvm
.. code-block:: text
class SubtargetFeature<string n, string a, string v, string d,
list<SubtargetFeature> i = []> {
@ -1696,7 +1696,7 @@ default value is an empty array.)
In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the
following features.
.. code-block:: llvm
.. code-block:: text
def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
"Enable SPARC-V9 instructions">;
@ -1710,7 +1710,7 @@ Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to
define particular SPARC processor subtypes that may have the previously
described features.
.. code-block:: llvm
.. code-block:: text
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;

View File

@ -747,7 +747,7 @@ template parameter is the name of the pass that is to be used on the command
line to specify that the pass should be added to a program (for example, with
:program:`opt` or :program:`bugpoint`). The first argument is the name of the
pass, which is to be used for the :option:`-help` output of programs, as well
as for debug output generated by the :option:`--debug-pass` option.
as for debug output generated by the `--debug-pass` option.
If you want your pass to be easily dumpable, you should implement the virtual
print method:

View File

@ -1,11 +1,6 @@
Overview
========
.. warning::
If you are using a released version of LLVM, see `the download page
<http://llvm.org/releases/>`_ to find your documentation.
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.

View File

@ -2014,6 +2014,9 @@ void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef A);
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx);
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs);
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID);
@ -2600,6 +2603,9 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef A);
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C, LLVMAttributeIndex Idx);
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs);
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID);

View File

@ -27,19 +27,24 @@ template<class GraphType>
struct GraphTraits {
// Elements to provide:
// NOTICE: We are in a transition from migration interfaces that require
// NodeType *, to NodeRef. NodeRef is required to be cheap to copy, but does
// not have to be a raw pointer. In the transition, user should define
// NodeType, and NodeRef = NodeType *.
//
// typedef NodeType - Type of Node in the graph
// typedef NodeRef - NodeType *
// typedef ChildIteratorType - Type used to iterate over children in graph
// static NodeType *getEntryNode(const GraphType &)
// static NodeRef getEntryNode(const GraphType &)
// Return the entry node of the graph
// static ChildIteratorType child_begin(NodeType *)
// static ChildIteratorType child_end (NodeType *)
// static ChildIteratorType child_begin(NodeRef)
// static ChildIteratorType child_end (NodeRef)
// Return iterators that point to the beginning and ending of the child
// node list for the specified node.
//
// typedef ...iterator nodes_iterator;
// static nodes_iterator nodes_begin(GraphType *G)
// static nodes_iterator nodes_end (GraphType *G)
@ -57,7 +62,7 @@ struct GraphTraits {
// your argument to XXX_begin(...) is unknown or needs to have the proper .h
// file #include'd.
//
typedef typename GraphType::UnknownGraphTypeError NodeType;
typedef typename GraphType::UnknownGraphTypeError NodeRef;
};

View File

@ -37,23 +37,22 @@ namespace llvm {
/// build up a vector of nodes in a particular SCC. Note that it is a forward
/// iterator and thus you cannot backtrack or re-visit nodes.
template <class GraphT, class GT = GraphTraits<GraphT>>
class scc_iterator
: public iterator_facade_base<
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
const std::vector<typename GT::NodeType *>, ptrdiff_t> {
typedef typename GT::NodeType NodeType;
class scc_iterator : public iterator_facade_base<
scc_iterator<GraphT, GT>, std::forward_iterator_tag,
const std::vector<typename GT::NodeRef>, ptrdiff_t> {
typedef typename GT::NodeRef NodeRef;
typedef typename GT::ChildIteratorType ChildItTy;
typedef std::vector<NodeType *> SccTy;
typedef std::vector<NodeRef> SccTy;
typedef typename scc_iterator::reference reference;
/// Element of VisitStack during DFS.
struct StackElement {
NodeType *Node; ///< The current node pointer.
NodeRef Node; ///< The current node pointer.
ChildItTy NextChild; ///< The next child, modified inplace during DFS.
unsigned MinVisited; ///< Minimum uplink value of all children of Node.
StackElement(NodeType *Node, const ChildItTy &Child, unsigned Min)
: Node(Node), NextChild(Child), MinVisited(Min) {}
StackElement(NodeRef Node, const ChildItTy &Child, unsigned Min)
: Node(Node), NextChild(Child), MinVisited(Min) {}
bool operator==(const StackElement &Other) const {
return Node == Other.Node &&
@ -67,10 +66,10 @@ class scc_iterator
///
/// nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
unsigned visitNum;
DenseMap<NodeType *, unsigned> nodeVisitNumbers;
DenseMap<NodeRef, unsigned> nodeVisitNumbers;
/// Stack holding nodes of the SCC.
std::vector<NodeType *> SCCNodeStack;
std::vector<NodeRef> SCCNodeStack;
/// The current SCC, retrieved using operator*().
SccTy CurrentSCC;
@ -80,7 +79,7 @@ class scc_iterator
std::vector<StackElement> VisitStack;
/// A single "visit" within the non-recursive DFS traversal.
void DFSVisitOne(NodeType *N);
void DFSVisitOne(NodeRef N);
/// The stack-based DFS traversal; defined below.
void DFSVisitChildren();
@ -88,7 +87,7 @@ class scc_iterator
/// Compute the next SCC using the DFS traversal.
void GetNextSCC();
scc_iterator(NodeType *entryN) : visitNum(0) {
scc_iterator(NodeRef entryN) : visitNum(0) {
DFSVisitOne(entryN);
GetNextSCC();
}
@ -131,7 +130,7 @@ public:
/// This informs the \c scc_iterator that the specified \c Old node
/// has been deleted, and \c New is to be used in its place.
void ReplaceNode(NodeType *Old, NodeType *New) {
void ReplaceNode(NodeRef Old, NodeRef New) {
assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
nodeVisitNumbers[New] = nodeVisitNumbers[Old];
nodeVisitNumbers.erase(Old);
@ -139,7 +138,7 @@ public:
};
template <class GraphT, class GT>
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeType *N) {
void scc_iterator<GraphT, GT>::DFSVisitOne(NodeRef N) {
++visitNum;
nodeVisitNumbers[N] = visitNum;
SCCNodeStack.push_back(N);
@ -155,8 +154,8 @@ void scc_iterator<GraphT, GT>::DFSVisitChildren() {
assert(!VisitStack.empty());
while (VisitStack.back().NextChild != GT::child_end(VisitStack.back().Node)) {
// TOS has at least one more child so continue DFS
NodeType *childN = *VisitStack.back().NextChild++;
typename DenseMap<NodeType *, unsigned>::iterator Visited =
NodeRef childN = *VisitStack.back().NextChild++;
typename DenseMap<NodeRef, unsigned>::iterator Visited =
nodeVisitNumbers.find(childN);
if (Visited == nodeVisitNumbers.end()) {
// this node has never been seen.
@ -176,7 +175,7 @@ template <class GraphT, class GT> void scc_iterator<GraphT, GT>::GetNextSCC() {
DFSVisitChildren();
// Pop the leaf on top of the VisitStack.
NodeType *visitingN = VisitStack.back().Node;
NodeRef visitingN = VisitStack.back().Node;
unsigned minVisitNum = VisitStack.back().MinVisited;
assert(VisitStack.back().NextChild == GT::child_end(visitingN));
VisitStack.pop_back();
@ -212,7 +211,7 @@ bool scc_iterator<GraphT, GT>::hasLoop() const {
assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
if (CurrentSCC.size() > 1)
return true;
NodeType *N = CurrentSCC.front();
NodeRef N = CurrentSCC.front();
for (ChildItTy CI = GT::child_begin(N), CE = GT::child_end(N); CI != CE;
++CI)
if (*CI == N)

View File

@ -26,10 +26,18 @@
#include <memory>
#include <utility> // for std::pair
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
namespace detail {
template <typename RangeT>
using IterOfRange = decltype(std::begin(std::declval<RangeT>()));
} // End detail namespace
//===----------------------------------------------------------------------===//
// Extra additions to <functional>
@ -235,6 +243,90 @@ auto reverse(
llvm::make_reverse_iterator(std::begin(C)));
}
/// An iterator adaptor that filters the elements of given inner iterators.
///
/// The predicate parameter should be a callable object that accepts the wrapped
/// iterator's reference type and returns a bool. When incrementing or
/// decrementing the iterator, it will call the predicate on each element and
/// skip any where it returns false.
///
/// \code
/// int A[] = { 1, 2, 3, 4 };
/// auto R = make_filter_range(A, [](int N) { return N % 2 == 1; });
/// // R contains { 1, 3 }.
/// \endcode
template <typename WrappedIteratorT, typename PredicateT>
class filter_iterator
: public iterator_adaptor_base<
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
typename std::common_type<
std::forward_iterator_tag,
typename std::iterator_traits<
WrappedIteratorT>::iterator_category>::type> {
using BaseT = iterator_adaptor_base<
filter_iterator<WrappedIteratorT, PredicateT>, WrappedIteratorT,
typename std::common_type<
std::forward_iterator_tag,
typename std::iterator_traits<WrappedIteratorT>::iterator_category>::
type>;
struct PayloadType {
WrappedIteratorT End;
PredicateT Pred;
};
Optional<PayloadType> Payload;
void findNextValid() {
assert(Payload && "Payload should be engaged when findNextValid is called");
while (this->I != Payload->End && !Payload->Pred(*this->I))
BaseT::operator++();
}
// Construct the begin iterator. The begin iterator requires to know where end
// is, so that it can properly stop when it hits end.
filter_iterator(WrappedIteratorT Begin, WrappedIteratorT End, PredicateT Pred)
: BaseT(std::move(Begin)),
Payload(PayloadType{std::move(End), std::move(Pred)}) {
findNextValid();
}
// Construct the end iterator. It's not incrementable, so Payload doesn't
// have to be engaged.
filter_iterator(WrappedIteratorT End) : BaseT(End) {}
public:
using BaseT::operator++;
filter_iterator &operator++() {
BaseT::operator++();
findNextValid();
return *this;
}
template <typename RT, typename PT>
friend iterator_range<filter_iterator<detail::IterOfRange<RT>, PT>>
make_filter_range(RT &&, PT);
};
/// Convenience function that takes a range of elements and a predicate,
/// and return a new filter_iterator range.
///
/// FIXME: Currently if RangeT && is a rvalue reference to a temporary, the
/// lifetime of that temporary is not kept by the returned range object, and the
/// temporary is going to be dropped on the floor after the make_iterator_range
/// full expression that contains this function call.
template <typename RangeT, typename PredicateT>
iterator_range<filter_iterator<detail::IterOfRange<RangeT>, PredicateT>>
make_filter_range(RangeT &&Range, PredicateT Pred) {
using FilterIteratorT =
filter_iterator<detail::IterOfRange<RangeT>, PredicateT>;
return make_range(FilterIteratorT(std::begin(std::forward<RangeT>(Range)),
std::end(std::forward<RangeT>(Range)),
std::move(Pred)),
FilterIteratorT(std::end(std::forward<RangeT>(Range))));
}
//===----------------------------------------------------------------------===//
// Extra additions to <utility>
//===----------------------------------------------------------------------===//

View File

@ -174,6 +174,7 @@ public:
UnknownEnvironment,
GNU,
GNUABI64,
GNUEABI,
GNUEABIHF,
GNUX32,
@ -476,8 +477,9 @@ public:
bool isGNUEnvironment() const {
EnvironmentType Env = getEnvironment();
return Env == Triple::GNU || Env == Triple::GNUEABI ||
Env == Triple::GNUEABIHF || Env == Triple::GNUX32;
return Env == Triple::GNU || Env == Triple::GNUABI64 ||
Env == Triple::GNUEABI || Env == Triple::GNUEABIHF ||
Env == Triple::GNUX32;
}
/// Checks if the environment could be MSVC.

View File

@ -155,7 +155,14 @@ template <
typename T = typename std::iterator_traits<WrappedIteratorT>::value_type,
typename DifferenceTypeT =
typename std::iterator_traits<WrappedIteratorT>::difference_type,
typename PointerT = T *, typename ReferenceT = T &,
typename PointerT = typename std::conditional<
std::is_same<T, typename std::iterator_traits<
WrappedIteratorT>::value_type>::value,
typename std::iterator_traits<WrappedIteratorT>::pointer, T *>::type,
typename ReferenceT = typename std::conditional<
std::is_same<T, typename std::iterator_traits<
WrappedIteratorT>::value_type>::value,
typename std::iterator_traits<WrappedIteratorT>::reference, T &>::type,
// Don't provide these, they are mostly to act as aliases below.
typename WrappedTraitsT = std::iterator_traits<WrappedIteratorT>>
class iterator_adaptor_base
@ -168,15 +175,7 @@ protected:
iterator_adaptor_base() = default;
template <typename U>
explicit iterator_adaptor_base(
U &&u,
typename std::enable_if<
!std::is_base_of<typename std::remove_cv<
typename std::remove_reference<U>::type>::type,
DerivedT>::value,
int>::type = 0)
: I(std::forward<U &&>(u)) {}
explicit iterator_adaptor_base(WrappedIteratorT u) : I(std::move(u)) {}
const WrappedIteratorT &wrapped() const { return I; }

View File

@ -410,6 +410,7 @@ public:
// traversals.
template <> struct GraphTraits<CallGraphNode *> {
typedef CallGraphNode NodeType;
typedef CallGraphNode *NodeRef;
typedef CallGraphNode::CallRecord CGNPairTy;
typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
@ -431,6 +432,7 @@ template <> struct GraphTraits<CallGraphNode *> {
template <> struct GraphTraits<const CallGraphNode *> {
typedef const CallGraphNode NodeType;
typedef const CallGraphNode *NodeRef;
typedef CallGraphNode::CallRecord CGNPairTy;
typedef std::pointer_to_unary_function<CGNPairTy, const CallGraphNode *>

View File

@ -196,6 +196,13 @@ namespace llvm {
/// block.
Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
/// \brief Generates a code sequence that evaluates this predicate.
/// The inserted instructions will be at position \p Loc.
/// The result will be of type i1 and will have a value of 0 when the
@ -253,6 +260,15 @@ namespace llvm {
void enableLSRMode() { LSRMode = true; }
/// \brief Set the current insertion point. This is useful if multiple calls
/// to expandCodeFor() are going to be made with the same insert point and
/// the insert point may be moved during one of the expansions (e.g. if the
/// insert point is not a block terminator).
void setInsertPoint(Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
}
/// \brief Clear the current insertion point. This is useful if the
/// instruction that had been serving as the insertion point may have been
/// deleted.
@ -313,12 +329,6 @@ namespace llvm {
Value *expand(const SCEV *S);
/// \brief Insert code to directly compute the specified SCEV expression
/// into the program. The inserted code is inserted into the SCEVExpander's
/// current insertion point. If a type is specified, the result will be
/// expanded to have that type, with a cast if necessary.
Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
/// \brief Determine the most "relevant" loop for the given SCEV.
const Loop *getRelevantLoop(const SCEV *);

View File

@ -740,6 +740,7 @@ struct MBB2NumberFunctor :
template <> struct GraphTraits<MachineBasicBlock *> {
typedef MachineBasicBlock NodeType;
typedef MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::succ_iterator ChildIteratorType;
static NodeType *getEntryNode(MachineBasicBlock *BB) { return BB; }
@ -753,6 +754,7 @@ template <> struct GraphTraits<MachineBasicBlock *> {
template <> struct GraphTraits<const MachineBasicBlock *> {
typedef const MachineBasicBlock NodeType;
typedef const MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
static NodeType *getEntryNode(const MachineBasicBlock *BB) { return BB; }
@ -772,6 +774,7 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
//
template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
typedef MachineBasicBlock NodeType;
typedef MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<MachineBasicBlock *> G) {
return G.Graph;
@ -786,6 +789,7 @@ template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
template <> struct GraphTraits<Inverse<const MachineBasicBlock*> > {
typedef const MachineBasicBlock NodeType;
typedef const MachineBasicBlock *NodeRef;
typedef MachineBasicBlock::const_pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<const MachineBasicBlock*> G) {
return G.Graph;

View File

@ -210,6 +210,7 @@ public:
private:
friend class AttrBuilder;
friend class AttributeSetImpl;
friend class AttributeSetNode;
template <typename Ty> friend struct DenseMapInfo;
/// \brief The attributes that we are managing. This can be null to represent

View File

@ -155,6 +155,7 @@ struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
template <> struct GraphTraits<BasicBlock*> {
typedef BasicBlock NodeType;
typedef BasicBlock *NodeRef;
typedef succ_iterator ChildIteratorType;
static NodeType *getEntryNode(BasicBlock *BB) { return BB; }
@ -168,6 +169,7 @@ template <> struct GraphTraits<BasicBlock*> {
template <> struct GraphTraits<const BasicBlock*> {
typedef const BasicBlock NodeType;
typedef const BasicBlock *NodeRef;
typedef succ_const_iterator ChildIteratorType;
static NodeType *getEntryNode(const BasicBlock *BB) { return BB; }
@ -187,6 +189,7 @@ template <> struct GraphTraits<const BasicBlock*> {
//
template <> struct GraphTraits<Inverse<BasicBlock*> > {
typedef BasicBlock NodeType;
typedef BasicBlock *NodeRef;
typedef pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
static inline ChildIteratorType child_begin(NodeType *N) {
@ -199,6 +202,7 @@ template <> struct GraphTraits<Inverse<BasicBlock*> > {
template <> struct GraphTraits<Inverse<const BasicBlock*> > {
typedef const BasicBlock NodeType;
typedef const BasicBlock *NodeRef;
typedef const_pred_iterator ChildIteratorType;
static NodeType *getEntryNode(Inverse<const BasicBlock*> G) {
return G.Graph;

View File

@ -479,6 +479,8 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
@ -1512,8 +1514,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
// Vector bit test

View File

@ -2349,6 +2349,10 @@ public:
/// from getBooleanContents().
bool isConstFalseVal(const SDNode *N) const;
/// Return a constant of type VT that contains a true value that respects
/// getBooleanContents()
SDValue getConstTrueVal(SelectionDAG &DAG, EVT VT, const SDLoc &DL) const;
/// Return if \p N is a True value when extended to \p VT.
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool Signed) const;

View File

@ -623,6 +623,7 @@ template <> struct GraphTraits<IrreducibleGraph> {
typedef bfi_detail::IrreducibleGraph GraphT;
typedef const GraphT::IrrNode NodeType;
typedef const GraphT::IrrNode *NodeRef;
typedef GraphT::IrrNode::iterator ChildIteratorType;
static const NodeType *getEntryNode(const GraphT &G) {

View File

@ -1424,8 +1424,8 @@ Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V,
/// integer type Ty is used to select how many bits are available for the
/// result. Returns null if the conversion cannot be performed, otherwise
/// returns the Constant value resulting from the conversion.
Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
Type *Ty) {
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
Type *Ty) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
@ -1438,7 +1438,8 @@ Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero,
APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
/*isSigned=*/true, mode,
&isExact);
if (status != APFloat::opOK && status != APFloat::opInexact)
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
}
@ -1676,17 +1677,17 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty);
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty);
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty);
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty);
}
}

View File

@ -3400,7 +3400,10 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
return TrueVal;
if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
unsigned BitWidth = Q.DL.getTypeSizeInBits(TrueVal->getType());
// FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
// decomposeBitTestICmp() might help.
unsigned BitWidth =
Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
@ -4274,7 +4277,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent())
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
!I->mayHaveSideEffects())
I->eraseFromParent();
} else {
Worklist.insert(I);
@ -4302,7 +4306,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent())
if (I->getParent() && !I->isEHPad() && !isa<TerminatorInst>(I) &&
!I->mayHaveSideEffects())
I->eraseFromParent();
}
return Simplified;

View File

@ -115,13 +115,19 @@ bool UnrolledInstAnalyzer::visitLoad(LoadInst &I) {
// We might have a vector load from an array. FIXME: for now we just bail
// out in this case, but we should be able to resolve and simplify such
// loads.
if(CDS->getElementType() != I.getType())
if (CDS->getElementType() != I.getType())
return false;
int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
if (SimplifiedAddrOp->getValue().getActiveBits() >= 64)
unsigned ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
if (SimplifiedAddrOp->getValue().getActiveBits() > 64)
return false;
int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
int64_t SimplifiedAddrOpV = SimplifiedAddrOp->getSExtValue();
if (SimplifiedAddrOpV < 0) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.
return false;
}
uint64_t Index = static_cast<uint64_t>(SimplifiedAddrOpV) / ElemSize;
if (Index >= CDS->getNumElements()) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.

View File

@ -1610,8 +1610,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
Instruction *IP) {
assert(IP);
Builder.SetInsertPoint(IP);
setInsertPoint(IP);
return expandCodeFor(SH, Ty);
}

View File

@ -214,10 +214,7 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
}
TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
// It's possible to ask for the FuncId of a function which doesn't have a
// subprogram: inlining a function with debug info into a function with none.
if (!SP)
return TypeIndex::None();
assert(SP);
// Check if we've already translated this subprogram.
auto I = TypeIndices.find({SP, nullptr});
@ -621,11 +618,12 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
std::string FuncName;
auto *SP = GV->getSubprogram();
assert(SP);
setCurrentSubprogram(SP);
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
if (SP != nullptr && !SP->getDisplayName().empty())
if (!SP->getDisplayName().empty())
FuncName =
getFullyQualifiedName(SP->getScope().resolve(), SP->getDisplayName());
@ -864,7 +862,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
void CodeViewDebug::beginFunction(const MachineFunction *MF) {
assert(!CurFn && "Can't process two functions at once!");
if (!Asm || !MMI->hasDebugInfo())
if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
return;
DebugHandlerBase::beginFunction(MF);
@ -1939,7 +1937,8 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
// Ignore DBG_VALUE locations and function prologue.
if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
if (!Asm || !CurFn || MI->isDebugValue() ||
MI->getFlag(MachineInstr::FrameSetup))
return;
DebugLoc DL = MI->getDebugLoc();
if (DL == PrevInstLoc || !DL)

View File

@ -996,6 +996,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
MachineBasicBlock *IBB = &*I;
MachineBasicBlock *PredBB = &*std::prev(I);
MergePotentials.clear();
MachineLoop *ML;
// Bail if merging after placement and IBB is the loop header because
// -- If merging predecessors that belong to the same loop as IBB, the
// common tail of merged predecessors may become the loop top if block
// placement is called again and the predecessors may branch to this common
// tail and require more branches. This can be relaxed if
// MachineBlockPlacement::findBestLoopTop is more flexible.
// --If merging predecessors that do not belong to the same loop as IBB, the
// loop info of IBB's loop and the other loops may be affected. Calling the
// block placement again may make big change to the layout and eliminate the
// reason to do tail merging here.
if (AfterBlockPlacement && MLI) {
ML = MLI->getLoopFor(IBB);
if (ML && IBB == ML->getHeader())
continue;
}
for (MachineBasicBlock *PBB : I->predecessors()) {
if (MergePotentials.size() == TailMergeThreshold)
break;
@ -1015,16 +1033,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->hasEHPadSuccessor())
continue;
// Bail out if the loop header (IBB) is not the top of the loop chain
// after the block placement. Otherwise, the common tail of IBB's
// predecessors may become the loop top if block placement is called again
// and the predecessors may branch to this common tail.
// FIXME: Relaxed this check if the algorithm of finding loop top is
// changed in MBP.
// After block placement, only consider predecessors that belong to the
// same loop as IBB. The reason is the same as above when skipping loop
// header.
if (AfterBlockPlacement && MLI)
if (MachineLoop *ML = MLI->getLoopFor(IBB))
if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
continue;
if (ML != MLI->getLoopFor(PBB))
continue;
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;

View File

@ -530,7 +530,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
unsigned Align =
std::max(DL->getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getLiveRange(StackGuardSlot));
Align, SSC.getFullLiveRange());
}
for (Argument *Arg : ByValArguments) {

View File

@ -25,7 +25,9 @@ static cl::opt<bool> ClColoring("safe-stack-coloring",
cl::Hidden, cl::init(true));
const StackColoring::LiveRange &StackColoring::getLiveRange(AllocaInst *AI) {
return LiveRanges[AllocaNumbering[AI]];
const auto IT = AllocaNumbering.find(AI);
assert(IT != AllocaNumbering.end());
return LiveRanges[IT->second];
}
bool StackColoring::readMarker(Instruction *I, bool *IsStart) {

View File

@ -100,7 +100,8 @@ void StackLayout::layoutObject(StackObject &Obj) {
}
// Split starting and ending regions if necessary.
for (StackRegion &R : Regions) {
for (unsigned i = 0; i < Regions.size(); ++i) {
StackRegion &R = Regions[i];
if (Start > R.Start && Start < R.End) {
StackRegion R0 = R;
R.Start = R0.End = Start;

View File

@ -6198,13 +6198,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
}
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
unsigned ElementWidth = VT.getScalarType().getSizeInBits();
// sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
// Here, T can be 1 or -1, depending on the type of the setcc and
// getBooleanContents().
unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
SDLoc DL(N);
SDValue NegOne =
DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
// bit depends on getBooleanContents(). So, ask TLI for a real "true" value
// of the appropriate width.
SDValue ExtTrueVal =
(SetCCWidth == 1)
? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
DL, VT)
: TLI.getConstTrueVal(DAG, VT, DL);
if (SDValue SCC = SimplifySelectCC(
DL, N0.getOperand(0), N0.getOperand(1), NegOne,
DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
DAG.getConstant(0, DL, VT),
cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
return SCC;
@ -6215,10 +6229,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
SDLoc DL(N);
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC,
NegOne, DAG.getConstant(0, DL, VT));
SDValue SetCC =
DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
DAG.getConstant(0, DL, VT));
}
}
}

View File

@ -6639,19 +6639,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
SDNode *FromNode = From.getNode();
SDNode *ToNode = To.getNode();
ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
SmallVector<SDDbgValue *, 2> ClonedDVs;
for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
I != E; ++I) {
SDDbgValue *Dbg = *I;
// Only add Dbgvalues attached to same ResNo.
if (Dbg->getKind() == SDDbgValue::SDNODE &&
Dbg->getResNo() == From.getResNo()) {
Dbg->getSDNode() == From.getNode() &&
Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
assert(FromNode != ToNode &&
"Should not transfer Debug Values intranode");
SDDbgValue *Clone =
getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
Dbg->getDebugLoc(), Dbg->getOrder());
AddDbgValue(Clone, ToNode, false);
ClonedDVs.push_back(Clone);
Dbg->setIsInvalidated();
}
}
for (SDDbgValue *I : ClonedDVs)
AddDbgValue(I, ToNode, false);
}
//===----------------------------------------------------------------------===//

View File

@ -1234,6 +1234,16 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
llvm_unreachable("Invalid boolean contents");
}
SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT,
const SDLoc &DL) const {
unsigned ElementWidth = VT.getScalarSizeInBits();
APInt TrueInt =
getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent
? APInt(ElementWidth, 1)
: APInt::getAllOnesValue(ElementWidth);
return DAG.getConstant(TrueInt, DL, VT);
}
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;

View File

@ -29,7 +29,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@ -539,6 +539,16 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
return TRI->regsOverlap(RegA, RegB);
}
// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<unsigned> &Set, unsigned Reg,
const TargetRegisterInfo *TRI) {
for (unsigned R : Set)
if (TRI->regsOverlap(R, Reg))
return true;
return false;
}
/// Return true if it's potentially profitable to commute the two-address
/// instruction that's being processed.
bool
@ -864,9 +874,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// FIXME: Needs more sophisticated heuristics.
return false;
SmallSet<unsigned, 2> Uses;
SmallSet<unsigned, 2> Kills;
SmallSet<unsigned, 2> Defs;
SmallVector<unsigned, 2> Uses;
SmallVector<unsigned, 2> Kills;
SmallVector<unsigned, 2> Defs;
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
@ -874,12 +884,12 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isDef())
Defs.insert(MOReg);
Defs.push_back(MOReg);
else {
Uses.insert(MOReg);
Uses.push_back(MOReg);
if (MOReg != Reg && (MO.isKill() ||
(LIS && isPlainlyKilled(MI, MOReg, LIS))))
Kills.insert(MOReg);
Kills.push_back(MOReg);
}
}
@ -888,8 +898,9 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator AfterMI = std::next(Begin);
MachineBasicBlock::iterator End = AfterMI;
while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) {
Defs.insert(End->getOperand(0).getReg());
while (End->isCopy() &&
regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) {
Defs.push_back(End->getOperand(0).getReg());
++End;
}
@ -915,21 +926,21 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
if (!MOReg)
continue;
if (MO.isDef()) {
if (Uses.count(MOReg))
if (regOverlapsSet(Uses, MOReg, TRI))
// Physical register use would be clobbered.
return false;
if (!MO.isDead() && Defs.count(MOReg))
if (!MO.isDead() && regOverlapsSet(Defs, MOReg, TRI))
// May clobber a physical register def.
// FIXME: This may be too conservative. It's ok if the instruction
// is sunken completely below the use.
return false;
} else {
if (Defs.count(MOReg))
if (regOverlapsSet(Defs, MOReg, TRI))
return false;
bool isKill =
MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
if (MOReg != Reg &&
((isKill && Uses.count(MOReg)) || Kills.count(MOReg)))
if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
regOverlapsSet(Kills, MOReg, TRI)))
// Don't want to extend other live ranges and update kills.
return false;
if (MOReg == Reg && !isKill)

View File

@ -19,8 +19,8 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/IR/Attributes.h"
#include "AttributeSetNode.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/TrailingObjects.h"
#include <climits>
#include <string>
@ -142,73 +142,6 @@ public:
StringRef getStringValue() const { return Val; }
};
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a group of attributes that apply to one
/// element: function, return type, or parameter.
class AttributeSetNode final
: public FoldingSetNode,
private TrailingObjects<AttributeSetNode, Attribute> {
friend TrailingObjects;
unsigned NumAttrs; ///< Number of attributes in this node.
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint64_t AvailableAttrs;
AttributeSetNode(ArrayRef<Attribute> Attrs)
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
"Too many attributes for AvailableAttrs");
// There's memory after the node where we can store the entries in.
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
for (Attribute I : *this) {
if (!I.isStringAttribute()) {
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
}
}
}
// AttributesSetNode is uniqued, these should not be publicly available.
void operator=(const AttributeSetNode &) = delete;
AttributeSetNode(const AttributeSetNode &) = delete;
public:
void operator delete(void *p) { ::operator delete(p); }
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
/// \brief Return the number of attributes this AttributeSet contains.
unsigned getNumAttributes() const { return NumAttrs; }
bool hasAttribute(Attribute::AttrKind Kind) const {
return AvailableAttrs & ((uint64_t)1) << Kind;
}
bool hasAttribute(StringRef Kind) const;
bool hasAttributes() const { return NumAttrs != 0; }
Attribute getAttribute(Attribute::AttrKind Kind) const;
Attribute getAttribute(StringRef Kind) const;
unsigned getAlignment() const;
unsigned getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
iterator begin() const { return getTrailingObjects<Attribute>(); }
iterator end() const { return begin() + NumAttrs; }
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, makeArrayRef(begin(), end()));
}
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
AttrList[I].Profile(ID);
}
};
typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
//===----------------------------------------------------------------------===//

98
lib/IR/AttributeSetNode.h Normal file
View File

@ -0,0 +1,98 @@
//===-- AttributeSetNode.h - AttributeSet Internal Node ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief This file defines the node class used internally by AttributeSet.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_IR_ATTRIBUTESETNODE_H
#define LLVM_IR_ATTRIBUTESETNODE_H
#include "llvm/ADT/FoldingSet.h"
#include "llvm/IR/Attributes.h"
#include "llvm/Support/TrailingObjects.h"
#include <climits>
namespace llvm {
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a group of attributes that apply to one
/// element: function, return type, or parameter.
class AttributeSetNode final
: public FoldingSetNode,
private TrailingObjects<AttributeSetNode, Attribute> {
friend TrailingObjects;
unsigned NumAttrs; ///< Number of attributes in this node.
/// Bitset with a bit for each available attribute Attribute::AttrKind.
uint64_t AvailableAttrs;
AttributeSetNode(ArrayRef<Attribute> Attrs)
: NumAttrs(Attrs.size()), AvailableAttrs(0) {
static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
"Too many attributes for AvailableAttrs");
// There's memory after the node where we can store the entries in.
std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
for (Attribute I : *this) {
if (!I.isStringAttribute()) {
AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
}
}
}
// AttributesSetNode is uniqued, these should not be publicly available.
void operator=(const AttributeSetNode &) = delete;
AttributeSetNode(const AttributeSetNode &) = delete;
public:
void operator delete(void *p) { ::operator delete(p); }
static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
static AttributeSetNode *get(AttributeSet AS, unsigned Index) {
return AS.getAttributes(Index);
}
/// \brief Return the number of attributes this AttributeSet contains.
unsigned getNumAttributes() const { return NumAttrs; }
bool hasAttribute(Attribute::AttrKind Kind) const {
return AvailableAttrs & ((uint64_t)1) << Kind;
}
bool hasAttribute(StringRef Kind) const;
bool hasAttributes() const { return NumAttrs != 0; }
Attribute getAttribute(Attribute::AttrKind Kind) const;
Attribute getAttribute(StringRef Kind) const;
unsigned getAlignment() const;
unsigned getStackAlignment() const;
uint64_t getDereferenceableBytes() const;
uint64_t getDereferenceableOrNullBytes() const;
std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
std::string getAsString(bool InAttrGrp) const;
typedef const Attribute *iterator;
iterator begin() const { return getTrailingObjects<Attribute>(); }
iterator end() const { return begin() + NumAttrs; }
void Profile(FoldingSetNodeID &ID) const {
Profile(ID, makeArrayRef(begin(), end()));
}
static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
AttrList[I].Profile(ID);
}
};
} // end llvm namespace
#endif

View File

@ -251,8 +251,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
Name == "avx.cvt.ps2.pd.256" ||
Name == "sse2.cvttps2dq" ||
Name.startswith("avx.cvtt.") ||
Name.startswith("avx.vinsertf128.") ||
Name == "avx2.vinserti128" ||
Name.startswith("avx.vextractf128.") ||
@ -712,12 +710,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
} else if (IsX86 && (Name == "sse2.cvttps2dq" ||
Name.startswith("avx.cvtt."))) {
// Truncation (round to zero) float/double to i32 vector conversion.
Value *Src = CI->getArgOperand(0);
VectorType *DstTy = cast<VectorType>(CI->getType());
Rep = Builder.CreateFPToSI(Src, DstTy, "cvtt");
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;

View File

@ -16,6 +16,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Attributes.h"
#include "AttributeSetNode.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@ -1844,6 +1845,18 @@ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
}
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
return ASN->getNumAttributes();
}
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
for (auto A: make_range(ASN->begin(), ASN->end()))
*Attrs++ = wrap(A);
}
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID) {
@ -2216,6 +2229,21 @@ void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
CallSite(unwrap<Instruction>(C)).addAttribute(Idx, unwrap(A));
}
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
LLVMAttributeIndex Idx) {
auto CS = CallSite(unwrap<Instruction>(C));
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
return ASN->getNumAttributes();
}
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
auto CS = CallSite(unwrap<Instruction>(C));
auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
for (auto A: make_range(ASN->begin(), ASN->end()))
*Attrs++ = wrap(A);
}
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID) {

View File

@ -675,8 +675,8 @@ void MDNode::handleChangedOperand(void *Ref, Metadata *New) {
Metadata *Old = getOperand(Op);
setOperand(Op, New);
// Drop uniquing for self-reference cycles.
if (New == this) {
// Drop uniquing for self-reference cycles and deleted constants.
if (New == this || (!New && Old && isa<ConstantAsMetadata>(Old))) {
if (!isResolved())
resolve();
storeDistinctInContext();

View File

@ -201,6 +201,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
switch (Kind) {
case UnknownEnvironment: return "unknown";
case GNU: return "gnu";
case GNUABI64: return "gnuabi64";
case GNUEABIHF: return "gnueabihf";
case GNUEABI: return "gnueabi";
case GNUX32: return "gnux32";
@ -468,6 +469,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
.StartsWith("eabihf", Triple::EABIHF)
.StartsWith("eabi", Triple::EABI)
.StartsWith("gnuabi64", Triple::GNUABI64)
.StartsWith("gnueabihf", Triple::GNUEABIHF)
.StartsWith("gnueabi", Triple::GNUEABI)
.StartsWith("gnux32", Triple::GNUX32)

View File

@ -250,6 +250,7 @@ def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
FeatureMacroOpFusion,
FeatureNEON,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
HasV8_1aOps]>;
def : ProcessorModel<"generic", NoSchedModel, [

View File

@ -7685,6 +7685,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
/// Fold a floating-point multiply by power of two into floating-point to
/// fixed-point conversion.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
@ -7728,10 +7729,16 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = MVT::v4i32;
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");
SDLoc DL(N);
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
@ -9853,7 +9860,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFpToIntCombine(N, DAG, Subtarget);
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, Subtarget);
case ISD::OR:

View File

@ -20,6 +20,7 @@ class AMDGPUInstrPrinter;
class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
class GCNTargetMachine;
struct MachineSchedContext;
class MCAsmInfo;
class raw_ostream;
@ -50,7 +51,7 @@ FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const TargetMachine *TM = nullptr);
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);

View File

@ -783,15 +783,19 @@ void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
auto Node = MD->getOperand(0);
unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
if (MD->getNumOperands()) {
auto Node = MD->getOperand(0);
if (Node->getNumOperands() > 1) {
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
RuntimeMD::OpenCL_C, 1);
uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
Major * 100 + Minor * 10, 2);
}
}
}
}

View File

@ -14,7 +14,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
@ -30,15 +32,28 @@ using namespace llvm;
namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare> {
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;
const SISubtarget *ST;
DivergenceAnalysis *DA;
const TargetMachine *TM;
Module *Mod;
bool HasUnsafeFPMath;
public:
static char ID;
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
FunctionPass(ID),
TM(TM) { }
TM(static_cast<const GCNTargetMachine *>(TM)),
ST(nullptr),
DA(nullptr),
Mod(nullptr),
HasUnsafeFPMath(false) { }
bool visitFDiv(BinaryOperator &I);
bool visitInstruction(Instruction &I) {
return false;
}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@ -55,7 +70,92 @@ public:
} // End anonymous namespace
static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
if (!CNum)
return false;
// Reciprocal f32 is handled separately without denormals.
return UnsafeDiv || CNum->isExactlyValue(+1.0);
}
// Insert an intrinsic for fast fdiv for safe math situations where we can
// reduce precision. Leave fdiv for situations where the generic node is
// expected to be optimized.
bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Type *Ty = FDiv.getType();
// TODO: Handle half
if (!Ty->getScalarType()->isFloatTy())
return false;
MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
if (!FPMath)
return false;
const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
float ULP = FPOp->getFPAccuracy();
if (ULP < 2.5f)
return false;
FastMathFlags FMF = FPOp->getFastMathFlags();
bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
FMF.allowReciprocal();
if (ST->hasFP32Denormals() && !UnsafeDiv)
return false;
IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
Function *Decl
= II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
Value *NewFDiv = nullptr;
if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
NewFDiv = UndefValue::get(VT);
// FIXME: Doesn't do the right thing for cases where the vector is partially
// constant. This works when the scalarizer pass is run first.
for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
Value *NumEltI = Builder.CreateExtractElement(Num, I);
Value *DenEltI = Builder.CreateExtractElement(Den, I);
Value *NewElt;
if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
} else {
NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
}
NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
}
} else {
if (!shouldKeepFDivF32(Num, UnsafeDiv))
NewFDiv = Builder.CreateCall(Decl, { Num, Den });
}
if (NewFDiv) {
FDiv.replaceAllUsesWith(NewFDiv);
NewFDiv->takeName(&FDiv);
FDiv.eraseFromParent();
}
return true;
}
static bool hasUnsafeFPMath(const Function &F) {
Attribute Attr = F.getFnAttribute("unsafe-fp-math");
return Attr.getValueAsString() == "true";
}
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
Mod = &M;
return false;
}
@ -63,10 +163,21 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
if (!TM || skipFunction(F))
return false;
ST = &TM->getSubtarget<SISubtarget>(F);
DA = &getAnalysis<DivergenceAnalysis>();
visit(F);
HasUnsafeFPMath = hasUnsafeFPMath(F);
return true;
bool MadeChange = false;
for (BasicBlock &BB : F) {
BasicBlock::iterator Next;
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; I = Next) {
Next = std::next(I);
MadeChange |= visit(*I);
}
}
return MadeChange;
}
INITIALIZE_TM_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
@ -77,6 +188,6 @@ INITIALIZE_TM_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE,
char AMDGPUCodeGenPrepare::ID = 0;
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const TargetMachine *TM) {
FunctionPass *llvm::createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM) {
return new AMDGPUCodeGenPrepare(TM);
}

View File

@ -420,9 +420,10 @@ int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP32_NEG_ONE = 0xbf800000;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
int FP64_NEG_ONE = 0xbff0000000000000;
}
def CONST : Constants;

View File

@ -29,16 +29,39 @@ static const char *const IntrinsicNameTable[] = {
#undef GET_INTRINSIC_NAME_TABLE
};
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned numTys) const {
if (IntrID < Intrinsic::num_intrinsics) {
return nullptr;
}
namespace {
#define GET_INTRINSIC_ATTRIBUTES
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_ATTRIBUTES
}
StringRef AMDGPUIntrinsicInfo::getName(unsigned IntrID,
ArrayRef<Type *> Tys) const {
if (IntrID < Intrinsic::num_intrinsics)
return StringRef();
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
"Invalid intrinsic ID");
std::string Result(IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics]);
return Result;
return IntrinsicNameTable[IntrID - Intrinsic::num_intrinsics];
}
std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
unsigned NumTys) const {
return getName(IntrID, makeArrayRef(Tys, NumTys)).str();
}
FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys) const {
// FIXME: Re-use Intrinsic::getType machinery
switch (ID) {
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
Type *F32Ty = Type::getFloatTy(Context);
return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
}
default:
llvm_unreachable("unhandled intrinsic");
}
}
unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
@ -69,7 +92,19 @@ bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
}
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned numTys) const {
llvm_unreachable("Not implemented");
ArrayRef<Type *> Tys) const {
FunctionType *FTy = getType(M->getContext(), IntrID, Tys);
Function *F
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
AttributeSet AS = getAttributes(M->getContext(),
static_cast<AMDGPUIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Type **Tys,
unsigned NumTys) const {
return getDeclaration(M, IntrID, makeArrayRef(Tys, NumTys));
}

View File

@ -34,13 +34,23 @@ enum ID {
class AMDGPUIntrinsicInfo final : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo();
StringRef getName(unsigned IntrId, ArrayRef<Type *> Tys = None) const;
std::string getName(unsigned IntrId, Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned NumTys = 0) const override;
unsigned lookupName(const char *Name, unsigned Len) const override;
bool isOverloaded(unsigned IID) const override;
Function *getDeclaration(Module *M, unsigned ID,
Type **Tys = nullptr,
unsigned numTys = 0) const override;
unsigned NumTys = 0) const override;
Function *getDeclaration(Module *M, unsigned ID,
ArrayRef<Type *> = None) const;
FunctionType *getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys = None) const;
};
} // end namespace llvm

View File

@ -348,9 +348,6 @@ static VectorType *arrayTypeToVecType(Type *ArrayTy) {
static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
if (isa<AllocaInst>(Ptr))
return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
auto I = GEPIdx.find(GEP);
@ -360,11 +357,11 @@ calculateVectorIndex(Value *Ptr,
static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
// FIXME we only support simple cases
if (GEP->getNumOperands() != 3)
return NULL;
return nullptr;
ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
if (!I0 || !I0->isZero())
return NULL;
return nullptr;
return GEP->getOperand(2);
}
@ -398,7 +395,8 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
// are just being conservative for now.
if (!AllocaTy ||
AllocaTy->getElementType()->isVectorTy() ||
AllocaTy->getNumElements() > 4) {
AllocaTy->getNumElements() > 4 ||
AllocaTy->getNumElements() < 2) {
DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
@ -443,9 +441,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(0);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
Inst->replaceAllUsesWith(ExtractElement);
@ -453,9 +453,11 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::Store: {
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(1);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
Value *VecValue = Builder.CreateLoad(BitCast);
Value *NewVecValue = Builder.CreateInsertElement(VecValue,
Inst->getOperand(0),
@ -469,7 +471,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
default:
Inst->dump();
llvm_unreachable("Inconsistency in instructions promotable to vector");
}
}
@ -477,11 +478,6 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
}
static bool isCallPromotable(CallInst *CI) {
// TODO: We might be able to handle some cases where the callee is a
// constantexpr bitcast of a function.
if (!CI->getCalledFunction())
return false;
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (!II)
return false;
@ -773,28 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
continue;
}
IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
if (!Intr) {
// FIXME: What is this for? It doesn't make sense to promote arbitrary
// function calls. If the call is to a defined function that can also be
// promoted, we should be able to do this once that function is also
// rewritten.
std::vector<Type*> ArgTypes;
for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
ArgIdx != ArgEnd; ++ArgIdx) {
ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
}
Function *F = Call->getCalledFunction();
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
F->isVarArg());
Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
NewType, F->getAttributes());
Function *NewF = cast<Function>(C);
Call->setCalledFunction(NewF);
continue;
}
IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
Builder.SetInsertPoint(Intr);
switch (Intr->getIntrinsicID()) {
case Intrinsic::lifetime_start:

View File

@ -309,6 +309,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
void addIRPasses() override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
bool addInstSelector() override;
@ -499,6 +500,13 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&DeadMachineInstructionElimID);
}
void GCNPassConfig::addIRPasses() {
// TODO: May want to move later or split into an early and late one.
addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
AMDGPUPassConfig::addIRPasses();
}
bool GCNPassConfig::addInstSelector() {
AMDGPUPassConfig::addInstSelector();
addPass(createSILowerI1CopiesPass());

View File

@ -122,6 +122,7 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
@ -832,13 +833,18 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::FP_TO_UINT:
if (N->getValueType(0) == MVT::i1) {
Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
return;
}
// Fall-through. Since we don't care about out of bounds values
// we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
// considers some extra cases which are not necessary here.
case ISD::FP_TO_SINT: {
if (N->getValueType(0) == MVT::i1) {
Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
return;
}
SDValue Result;
if (expandFP_TO_SINT(N, Result, DAG))
Results.push_back(Result);
@ -1052,15 +1058,24 @@ SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
}
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
DL,
MVT::i1,
Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETNE)
);
Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETEQ));
}
SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
return DAG.getNode(
ISD::SETCC,
DL,
MVT::i1,
Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
DAG.getCondCode(ISD::SETEQ));
}
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,

View File

@ -72,7 +72,8 @@ private:
SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPrivateExtLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

View File

@ -41,7 +41,8 @@ enum {
WQM = 1 << 22,
VGPRSpill = 1 << 23,
VOPAsmPrefer32Bit = 1 << 24,
Gather4 = 1 << 25
Gather4 = 1 << 25,
DisableWQM = 1 << 26
};
}

View File

@ -1134,9 +1134,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineFunction *MF = BB->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOVK_I32))
.addOperand(MI.getOperand(0))
.addImm(MFI->LDSSize);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
.addOperand(MI.getOperand(0))
.addImm(MFI->LDSSize);
MI.eraseFromParent();
return BB;
}
@ -1792,6 +1792,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
return lowerFDIV_FAST(Op, DAG);
}
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
Op.getOperand(1),
@ -2098,7 +2101,8 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// Catch division cases where we can use shortcuts with rcp and rsq
// instructions.
SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@ -2139,47 +2143,48 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
// TODO: Should this propagate fast-math-flags?
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
// rcp does not support denormals.
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
if (SDValue FastLowered = LowerFastFDIV(Op, DAG))
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
return FastLowered;
SDLoc SL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
// faster 2.5 ulp fdiv when using -amdgpu-fast-fdiv flag
if (EnableAMDGPUFastFDIV) {
// This does not support denormals.
SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
const APFloat K0Val(BitsToFloat(0x6f800000));
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
const APFloat K1Val(BitsToFloat(0x2f800000));
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
EVT SetCCVT =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
// TODO: Should this propagate fast-math-flags?
r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
// rcp does not support denormals.
SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
}
// Generates more precise fpdiv32.
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
@ -2209,7 +2214,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
if (DAG.getTarget().Options.UnsafeFPMath)
return LowerFastFDIV(Op, DAG);
return lowerFastUnsafeFDIV(Op, DAG);
SDLoc SL(Op);
SDValue X = Op.getOperand(0);

View File

@ -36,7 +36,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;

View File

@ -41,6 +41,8 @@ class InstSI <dag outs, dag ins, string asm = "",
field bits<1> DS = 0;
field bits<1> MIMG = 0;
field bits<1> FLAT = 0;
// Whether WQM _must_ be enabled for this instruction.
field bits<1> WQM = 0;
field bits<1> VGPRSpill = 0;
@ -50,6 +52,9 @@ class InstSI <dag outs, dag ins, string asm = "",
field bits<1> Gather4 = 0;
// Whether WQM _must_ be disabled for this instruction.
field bits<1> DisableWQM = 0;
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@ -81,6 +86,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{23} = VGPRSpill;
let TSFlags{24} = VOPAsmPrefer32Bit;
let TSFlags{25} = Gather4;
let TSFlags{26} = DisableWQM;
let SchedRW = [Write32Bit];

View File

@ -738,7 +738,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
MachineBasicBlock::iterator Insert = Entry.front();
DebugLoc DL = Insert->getDebugLoc();
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass,
*MF);
if (TIDReg == AMDGPU::NoRegister)
return TIDReg;

View File

@ -340,6 +340,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::WQM;
}
static bool isDisableWQM(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
}
bool isDisableWQM(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
}
static bool isVGPRSpill(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
}

View File

@ -2949,6 +2949,10 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <0>;
let DisableWQM = 1 in {
def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>;
}
let addr64 = 0, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@ -3019,7 +3023,8 @@ multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins,
multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
ValueType vt, SDPatternOperator atomic> {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1 in {
let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1,
DisableWQM = 1 in {
// No return variants
let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in {
@ -3423,6 +3428,7 @@ class MIMG_Store_Helper <bits<7> op, string asm,
let mayStore = 1;
let hasSideEffects = 1;
let hasPostISelHook = 0;
let DisableWQM = 1;
}
multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
@ -3454,6 +3460,7 @@ class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
let mayStore = 1;
let hasSideEffects = 1;
let hasPostISelHook = 0;
let DisableWQM = 1;
let Constraints = "$vdst = $vdata";
let AsmMatchConverter = "cvtMIMGAtomic";
}

View File

@ -2200,7 +2200,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, 0,
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _OFFSET) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(as_i1imm $glc), (as_i1imm $slc), 0)
>;
@ -2208,7 +2208,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
(MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _IDXEN) $vdata, $vindex, $rsrc, $soffset,
(!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), 0)
>;
@ -2217,7 +2217,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, 0,
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _OFFEN) $vdata, $voffset, $rsrc, $soffset,
(!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i1imm $glc),
(as_i1imm $slc), 0)
>;
@ -2226,7 +2226,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(name vt:$vdata, v4i32:$rsrc, i32:$vindex,
(MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
imm:$glc, imm:$slc),
(!cast<MUBUF>(opcode # _BOTHEN)
(!cast<MUBUF>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@ -3391,6 +3391,16 @@ def : Pat <
(V_CNDMASK_B32_e64 0, -1, $src), sub1)
>;
class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
(i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
(i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
// If we need to perform a logical operation on i1 values, we need to
// use vector comparisons since there is only one SCC register. Vector
// comparisions still write to a pair of SGPRs, so treat these as

View File

@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
// SI Intrinsic Definitions
// Backend internal SI Intrinsic Definitions. User code should not
// directly use these.
//
//===----------------------------------------------------------------------===//
@ -177,6 +178,12 @@ let TargetPrefix = "SI", isTarget = 1 in {
} // End TargetPrefix = "SI", isTarget = 1
let TargetPrefix = "amdgcn", isTarget = 1 in {
// Emit 2.5 ulp, no denormal division. Should only be inserted by
// pass based on !fpmath metadata.
def int_amdgcn_fdiv_fast : Intrinsic<
[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
>;
/* Control flow Intrinsics */
def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;

View File

@ -203,7 +203,8 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
Spill.Lane = Lane;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
*MF);
if (LaneVGPR == AMDGPU::NoRegister)
// We have no VGPRs left for spilling SGPRs.

View File

@ -957,10 +957,13 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
/// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return
// AMDGPU::NoRegister.
unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const {
unsigned
SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC,
const MachineFunction &MF) const {
for (unsigned Reg : *RC)
if (!MRI.isPhysRegUsed(Reg))
if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
return Reg;
return AMDGPU::NoRegister;
}

View File

@ -185,7 +185,8 @@ public:
unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
const TargetRegisterClass *RC,
const MachineFunction &MF) const;
unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
unsigned getVGPR32PressureSet() const { return VGPR32SetID; };

View File

@ -94,12 +94,15 @@ private:
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
DenseMap<const MachineInstr *, InstrInfo> Instructions;
DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
SmallVector<const MachineInstr *, 2> ExecExports;
SmallVector<MachineInstr *, 1> LiveMaskQueries;
void markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist);
char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
@ -126,6 +129,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -135,8 +139,11 @@ public:
char SIWholeQuadMode::ID = 0;
INITIALIZE_PASS(SIWholeQuadMode, DEBUG_TYPE,
"SI Whole Quad Mode", false, false)
INITIALIZE_PASS_BEGIN(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(SIWholeQuadMode, DEBUG_TYPE, "SI Whole Quad Mode", false,
false)
char &llvm::SIWholeQuadModeID = SIWholeQuadMode::ID;
@ -144,6 +151,23 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
return new SIWholeQuadMode;
}
void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
std::vector<WorkItem> &Worklist) {
InstrInfo &II = Instructions[&MI];
assert(Flag == StateWQM || Flag == StateExact);
// Ignore if the instruction is already marked. The typical case is that we
// mark an instruction WQM multiple times, but for atomics it can happen that
// Flag is StateWQM, but Needs is already set to StateExact. In this case,
// letting the atomic run in StateExact is correct as per the relevant specs.
if (II.Needs)
return;
II.Needs = Flag;
Worklist.push_back(&MI);
}
// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
@ -161,7 +185,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
Flags = StateWQM;
} else if (MI.mayStore() && TII->usesVM_CNT(MI)) {
} else if (TII->isDisableWQM(MI)) {
Flags = StateExact;
} else {
// Handle export instructions with the exec mask valid flag set
@ -192,8 +216,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
continue;
}
Instructions[&MI].Needs = Flags;
Worklist.push_back(&MI);
markInstruction(MI, Flags, Worklist);
GlobalFlags |= Flags;
}
@ -214,9 +237,10 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[MBB];
// Control flow-type instructions that are followed by WQM computations
// must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !(II.Needs & StateWQM) && MI.isTerminator()) {
// Control flow-type instructions and stores to temporary memory that are
// followed by WQM computations must themselves be in WQM.
if ((II.OutNeeds & StateWQM) && !II.Needs &&
(MI.isTerminator() || (TII->usesVM_CNT(MI) && MI.mayStore()))) {
Instructions[&MI].Needs = StateWQM;
II.Needs = StateWQM;
}
@ -249,32 +273,35 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
if (!Use.isReg() || !Use.isUse())
continue;
// At this point, physical registers appear as inputs or outputs
// and following them makes no sense (and would in fact be incorrect
// when the same VGPR is used as both an output and an input that leads
// to a NeedsWQM instruction).
//
// Note: VCC appears e.g. in 64-bit addition with carry - theoretically we
// have to trace this, in practice it happens for 64-bit computations like
// pointers where both dwords are followed already anyway.
if (!TargetRegisterInfo::isVirtualRegister(Use.getReg()))
continue;
unsigned Reg = Use.getReg();
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg())) {
InstrInfo &DefII = Instructions[&DefMI];
// Obviously skip if DefMI is already flagged as NeedWQM.
//
// The instruction might also be flagged as NeedExact. This happens when
// the result of an atomic is used in a WQM computation. In this case,
// the atomic must not run for helper pixels and the WQM result is
// undefined.
if (DefII.Needs != 0)
// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
if (Reg == AMDGPU::EXEC)
continue;
DefII.Needs = StateWQM;
Worklist.push_back(&DefMI);
for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
LiveRange &LR = LIS->getRegUnit(*RegUnit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)
continue;
// Since we're in machine SSA, we do not need to track physical
// registers across basic blocks.
if (Value->isPHIDef())
continue;
markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
Worklist);
}
continue;
}
for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
markInstruction(DefMI, StateWQM, Worklist);
}
}
@ -468,6 +495,7 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
char GlobalFlags = analyzeFunction(MF);
if (!(GlobalFlags & StateWQM)) {

View File

@ -3857,7 +3857,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// Try to convert two saturating conditional selects into a single SSAT
SDValue SatValue;
uint64_t SatConstant;
if (isSaturatingConditional(Op, SatValue, SatConstant))
if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
isSaturatingConditional(Op, SatValue, SatConstant))
return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));

View File

@ -3650,7 +3650,8 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
def SSAT : AI<(outs GPRnopc:$Rd),
(ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@ -3666,7 +3667,8 @@ def SSAT : AI<(outs GPRnopc:$Rd),
def SSAT16 : AI<(outs GPRnopc:$Rd),
(ins imm1_16:$sat_imm, GPRnopc:$Rn), SatFrm,
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []> {
NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;
@ -3679,7 +3681,8 @@ def SSAT16 : AI<(outs GPRnopc:$Rd),
def USAT : AI<(outs GPRnopc:$Rd),
(ins imm0_31:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsARM,HasV6]> {
bits<4> Rd;
bits<5> sat_imm;
bits<4> Rn;
@ -3695,7 +3698,8 @@ def USAT : AI<(outs GPRnopc:$Rd),
def USAT16 : AI<(outs GPRnopc:$Rd),
(ins imm0_15:$sat_imm, GPRnopc:$Rn), SatFrm,
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []> {
NoItinerary, "usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsARM,HasV6]>{
bits<4> Rd;
bits<4> sat_imm;
bits<4> Rn;

View File

@ -2240,7 +2240,8 @@ class T2SatI<dag oops, dag iops, InstrItinClass itin,
def t2SSAT: T2SatI<
(outs rGPR:$Rd),
(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []> {
NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@ -2251,7 +2252,7 @@ def t2SSAT: T2SatI<
def t2SSAT16: T2SatI<
(outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
"ssat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasDSP]> {
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
@ -2265,7 +2266,8 @@ def t2SSAT16: T2SatI<
def t2USAT: T2SatI<
(outs rGPR:$Rd),
(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []> {
NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
@ -2275,7 +2277,7 @@ def t2USAT: T2SatI<
def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
NoItinerary,
"usat16", "\t$Rd, $sat_imm, $Rn", []>,
Requires<[IsThumb2, HasDSP]> {
Requires<[IsThumb2, HasDSP]> {
let Inst{31-22} = 0b1111001110;
let Inst{20} = 0;
let Inst{15} = 0;

View File

@ -518,6 +518,10 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
return true;
return false;
case ELF::R_MIPS_GOT_PAGE:
case ELF::R_MICROMIPS_GOT_PAGE:
case ELF::R_MIPS_GOT_OFST:
case ELF::R_MICROMIPS_GOT_OFST:
case ELF::R_MIPS_16:
case ELF::R_MIPS_32:
case ELF::R_MIPS_GPREL32:
@ -539,8 +543,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_SHIFT5:
case ELF::R_MIPS_SHIFT6:
case ELF::R_MIPS_GOT_DISP:
case ELF::R_MIPS_GOT_PAGE:
case ELF::R_MIPS_GOT_OFST:
case ELF::R_MIPS_GOT_HI16:
case ELF::R_MIPS_GOT_LO16:
case ELF::R_MIPS_INSERT_A:
@ -589,8 +591,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MICROMIPS_PC16_S1:
case ELF::R_MICROMIPS_CALL16:
case ELF::R_MICROMIPS_GOT_DISP:
case ELF::R_MICROMIPS_GOT_PAGE:
case ELF::R_MICROMIPS_GOT_OFST:
case ELF::R_MICROMIPS_GOT_HI16:
case ELF::R_MICROMIPS_GOT_LO16:
case ELF::R_MICROMIPS_SUB:

View File

@ -28,12 +28,19 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
// FIXME: This condition isn't quite right but it's the best we can do until
// this object can identify the ABI. It will misbehave when using O32
// on a mips64*-* triple.
if ((TheTriple.getArch() == Triple::mipsel) ||
(TheTriple.getArch() == Triple::mips)) {
PrivateGlobalPrefix = "$";
PrivateLabelPrefix = "$";
}
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.2byte\t";
Data32bitsDirective = "\t.4byte\t";
Data64bitsDirective = "\t.8byte\t";
PrivateGlobalPrefix = "$";
PrivateLabelPrefix = "$";
CommentString = "#";
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";

View File

@ -57,7 +57,10 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
else
Ret += "E";
Ret += "-m:m";
if (ABI.IsO32())
Ret += "-m:m";
else
Ret += "-m:e";
// Pointers are 32 bit on some ABIs.
if (!ABI.IsN64())

View File

@ -1187,6 +1187,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
@ -13373,6 +13381,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SrcVT.isVector()) {
if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
@ -13380,6 +13389,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
DAG.getUNDEF(SrcVT)));
}
if (SrcVT.getVectorElementType() == MVT::i1) {
if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
@ -13694,6 +13706,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
MVT SVT = N0.getSimpleValueType();
SDLoc dl(Op);
if (SVT.getVectorElementType() == MVT::i1) {
if (SVT == MVT::v2i1)
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
}
switch (SVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");

View File

@ -2661,7 +2661,8 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, unsigned &NewSrc,
bool &isKill, bool &isUndef,
MachineOperand &ImplicitOp) const {
MachineOperand &ImplicitOp,
LiveVariables *LV) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
if (AllowSP) {
@ -2715,13 +2716,17 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
get(TargetOpcode::COPY))
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
.addOperand(Src);
// Which is obviously going to be dead after we're done with it.
isKill = true;
isUndef = false;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
}
// We've set all the parameters without issue.
@ -2900,7 +2905,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -2943,7 +2948,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -2977,7 +2982,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@ -3016,7 +3021,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
const MachineOperand &Src2 = MI.getOperand(2);
@ -3024,7 +3029,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, isUndef2, ImplicitOp2))
SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
return nullptr;
MachineInstrBuilder MIB =
@ -3087,7 +3092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
SrcReg, isKill, isUndef, ImplicitOp, LV))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))

View File

@ -230,7 +230,7 @@ public:
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, unsigned &NewSrc,
bool &isKill, bool &isUndef,
MachineOperand &ImplicitOp) const;
MachineOperand &ImplicitOp, LiveVariables *LV) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target

View File

@ -1820,7 +1820,7 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg,
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
@ -1836,7 +1836,7 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>,
Sched<[WriteCvtF2F]>;
def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg,
def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
@ -2009,24 +2009,35 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(loadv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
[(set VR256:$dst,
(int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
[(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
(loadv8f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
Sched<[WriteCvtF2ILd]>;
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
[(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst,
(int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
let Predicates = [HasAVX] in {
def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
@ -2096,10 +2107,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
[(set VR128:$dst,
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;

View File

@ -332,6 +332,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
namespace llvm {
template <> struct GraphTraits<ArgumentGraphNode *> {
typedef ArgumentGraphNode NodeType;
typedef ArgumentGraphNode *NodeRef;
typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
static inline NodeType *getEntryNode(NodeType *A) { return A; }

View File

@ -44,6 +44,7 @@
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@ -779,7 +780,8 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
// Instructions could multiply use V.
while (UI != E && *UI == I)
++UI;
I->eraseFromParent();
if (isInstructionTriviallyDead(I, TLI))
I->eraseFromParent();
}
}

View File

@ -134,6 +134,10 @@ static cl::opt<int> PreInlineThreshold(
cl::desc("Control the amount of inlining in pre-instrumentation inliner "
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
"enable-gvn-hoist", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental GVN Hoisting pass"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@ -232,7 +236,8 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
FPM.add(createGVNHoistPass());
if(EnableGVNHoist)
FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}

View File

@ -553,8 +553,11 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
// decomposeBitTestICmp() might help.
{
unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType());
unsigned BitWidth =
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
APInt MinSignedValue = APInt::getSignBit(BitWidth);
Value *X;
const APInt *Y, *C;

View File

@ -2830,7 +2830,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
eraseInstFromFunction(*I);
if (isInstructionTriviallyDead(I, TLI))
eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@ -2851,7 +2852,8 @@ bool InstCombiner::run() {
// Add operands to the worklist.
replaceInstUsesWith(*I, C);
++NumConstProp;
eraseInstFromFunction(*I);
if (isInstructionTriviallyDead(I, TLI))
eraseInstFromFunction(*I);
MadeIRChange = true;
continue;
}
@ -3007,7 +3009,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
++NumConstProp;
Inst->eraseFromParent();
if (isInstructionTriviallyDead(Inst, TLI))
Inst->eraseFromParent();
continue;
}

Some files were not shown because too many files have changed in this diff Show More