Vendor import of llvm release_80 branch r355313:

https://llvm.org/svn/llvm-project/llvm/branches/release_80@355313
This commit is contained in:
Dimitry Andric 2019-03-04 18:25:41 +00:00
parent bd7f07563c
commit 1d6bb9f417
17 changed files with 322 additions and 127 deletions

2
.gitignore vendored
View File

@ -72,6 +72,8 @@ docs/_build
# VS2017 and VSCode config files.
.vscode
.vs
# clangd index
.clangd
#==============================================================================#
# Files created in tree by the Go bindings.

View File

@ -1263,7 +1263,7 @@ func (v Value) Indices() []uint32 {
num := C.LLVMGetNumIndices(v.C)
indicesPtr := C.LLVMGetIndices(v.C)
// https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
rawIndices := (*[1 << 30]C.uint)(unsafe.Pointer(indicesPtr))[:num:num]
rawIndices := (*[1 << 20]C.uint)(unsafe.Pointer(indicesPtr))[:num:num]
indices := make([]uint32, num)
for i := range indices {
indices[i] = uint32(rawIndices[i])

View File

@ -12,7 +12,7 @@ This document contains the release notes for the LLVM Compiler Infrastructure,
release 8.0.0. Here we describe the status of LLVM, including major improvements
from the previous release, improvements in various subprojects of LLVM, and
some of the current users of the code. All LLVM releases may be downloaded
from the `LLVM releases web site <https://llvm.org/releases/>`_.
from the `LLVM releases web site <https://releases.llvm.org/>`_.
For more information about LLVM, including information about the latest
release, please check out the `main LLVM web site <https://llvm.org/>`_. If you
@ -39,14 +39,19 @@ setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to
``ON``.
Known Issues
============
These are issues that couldn't be fixed before the release. See the bug reports
for the latest status.
* `PR40547 <https://llvm.org/pr40547>`_ Clang gets miscompiled by trunk GCC.
* `PR40761 <https://llvm.org/pr40761>`_ "asan-dynamic" doesn't work on FreeBSD.
Non-comprehensive list of changes in this release
=================================================
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
point (e.g. maybe you would like to give an example of the
functionality, or simply have a lot to talk about), see the `NOTE` below
for adding a new subsection.
* The **llvm-cov** tool can now export lcov trace files using the
`-format=lcov` option of the `export` command.
@ -80,15 +85,7 @@ Non-comprehensive list of changes in this release
available in the `RFC
<https://lists.llvm.org/pipermail/llvm-dev/2018-November/127461.html>`_.
.. NOTE
If you would like to document a larger change, then you can add a
subsection about it right here. You can copy the following boilerplate
and un-indent it (the indentation causes it to be inside this comment).
Special New Feature
-------------------
Makes programs 10x faster by doing Special New Thing.
* Windows support for libFuzzer (x86_64).
Changes to the LLVM IR
----------------------
@ -110,17 +107,12 @@ Changes to the AArch64 Target
on ARM.
Changes to the ARM Backend
--------------------------
During this release ...
Changes to the Hexagon Target
-----------------------------
* Added support for Hexagon/HVX V66 ISA.
Changes to the MIPS Target
--------------------------
@ -142,6 +134,7 @@ Changes to the MIPS Target
* Numerous bug fixes and code cleanups.
Changes to the PowerPC Target
-----------------------------
@ -153,7 +146,7 @@ Changes to the PowerPC Target
* Better overload rules for compatible vector type parameter
* Support constraint wi, modifier x and VSX registers in inline asm
* Support constraint 'wi', modifier 'x' and VSX registers in inline asm
* More ``__float128`` support
@ -198,15 +191,6 @@ Changes to the X86 Target
* ADCX instruction will no longer be emitted. This instruction is rarely better
than the legacy ADC instruction and just increased code size.
Changes to the AMDGPU Target
-----------------------------
During this release ...
Changes to the AVR Target
-----------------------------
During this release ...
Changes to the WebAssembly Target
---------------------------------
@ -220,25 +204,16 @@ use for it will be to add support for returning small structs as multiple
return values, once the underlying WebAssembly platform itself supports it.
Additionally, multithreading support is not yet included in the stable ABI.
Changes to the Nios2 Target
---------------------------
* The Nios2 target was removed from this release.
Changes to the OCaml bindings
-----------------------------
Changes to the C API
--------------------
Changes to the DAG infrastructure
---------------------------------
Changes to LLDB
===============
* Printed source code is now syntax highlighted in the terminal (only for C
languages).

View File

@ -471,9 +471,18 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
OutStreamer->SwitchSection(ReadOnlySec);
bool JTInDiffSection =
!STI->isTargetCOFF() ||
!TLOF.shouldPutJumpTableInFunctionSection(
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
F);
if (JTInDiffSection) {
// Drop it in the readonly section.
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
OutStreamer->SwitchSection(ReadOnlySec);
}
auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {

View File

@ -2108,9 +2108,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
++MBBI;
if (MBBI->isTerminator())
return;
// Create an UnwindHelp object.
int UnwindHelpFI =
MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
@ -2118,8 +2115,10 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
// We need to store -2 into the UnwindHelp object at the start of the
// function.
DebugLoc DL;
RS->enterBasicBlock(MBB);
unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
RS->enterBasicBlockEnd(MBB);
RS->backward(std::prev(MBBI));
unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
assert(DstReg && "There must be a free register after frame setup");
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
.addReg(DstReg, getKillRegState(true))

View File

@ -209,8 +209,8 @@ static std::string computeDataLayout(const Triple &TT,
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
Optional<Reloc::Model> RM) {
// AArch64 Darwin is always PIC.
if (TT.isOSDarwin())
// AArch64 Darwin and Windows are always PIC.
if (TT.isOSDarwin() || TT.isOSWindows())
return Reloc::PIC_;
// On ELF platforms the default static relocation model has a smart enough
// linker to cope with referencing external symbols defined in a shared

View File

@ -122,10 +122,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
(SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
(SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
// The legalizer inserts an unnecessary `and 1` to make input conform
// to getBooleanContents, which we can lower away.
def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
(SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
(SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;

View File

@ -1138,15 +1138,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
if (AM.hasSymbolicDisplacement())
return true;
bool IsRIPRelTLS = false;
bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
if (IsRIPRel) {
SDValue Val = N.getOperand(0);
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
IsRIPRelTLS = true;
}
// We can't use an addressing mode in the 64-bit large code model. In the
// medium code model, we use can use an mode when RIP wrappers are present.
// That signifies access to globals that are known to be "near", such as the
// GOT itself.
// We can't use an addressing mode in the 64-bit large code model.
// Global TLS addressing is an exception. In the medium code model,
// we use can use a mode when RIP wrappers are present.
// That signifies access to globals that are known to be "near",
// such as the GOT itself.
CodeModel::Model M = TM.getCodeModel();
if (Subtarget->is64Bit() &&
(M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
((M == CodeModel::Large && !IsRIPRelTLS) ||
(M == CodeModel::Medium && !IsRIPRel)))
return true;
// Base and index reg must be 0 in order to use %rip as base.

View File

@ -38134,8 +38134,11 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return true;
// See if this is a single use constant which can be constant folded.
SDValue BC = peekThroughOneUseBitcasts(Op);
return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
// NOTE: We don't peek throught bitcasts here because there is currently
// no support for constant folding truncate+bitcast+vector_of_constants. So
// we'll just send up with a truncate on both operands which will
// get turned back into (truncate (binop)) causing an infinite loop.
return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
};
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {

View File

@ -0,0 +1,48 @@
; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 | FileCheck %s
define void @f(i32 %x) {
entry:
switch i32 %x, label %sw.epilog [
i32 0, label %sw.bb
i32 1, label %sw.bb1
i32 2, label %sw.bb2
i32 3, label %sw.bb3
]
sw.bb: ; preds = %entry
tail call void @g(i32 0) #2
br label %sw.epilog
sw.bb1: ; preds = %entry
tail call void @g(i32 1) #2
br label %sw.epilog
sw.bb2: ; preds = %entry
tail call void @g(i32 2) #2
br label %sw.epilog
sw.bb3: ; preds = %entry
tail call void @g(i32 3) #2
br label %sw.epilog
sw.epilog: ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
tail call void @g(i32 10) #2
ret void
}
declare void @g(i32)
; CHECK: .text
; CHECK: f:
; CHECK: .seh_proc f
; CHECK: b g
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: .LJTI0_0:
; CHECK: .word .LBB0_2-.LJTI0_0
; CHECK: .word .LBB0_3-.LJTI0_0
; CHECK: .word .LBB0_4-.LJTI0_0
; CHECK: .word .LBB0_5-.LJTI0_0
; CHECK: .section .xdata,"dr"
; CHECK: .seh_handlerdata
; CHECK: .text
; CHECK: .seh_endproc

View File

@ -0,0 +1,40 @@
; RUN: llc < %s | FileCheck %s
; Make sure the prologue is sane. (Doesn't need to exactly match this,
; but the original issue only reproduced if the cbz was immediately
; after the frame setup.)
; CHECK: sub sp, sp, #32
; CHECK-NEXT: stp x29, x30, [sp, #16]
; CHECK-NEXT: add x29, sp, #16
; CHECK-NEXT: orr x1, xzr, #0xfffffffffffffffe
; CHECK-NEXT: stur x1, [x29, #-16]
; CHECK-NEXT: cbz w0, .LBB0_2
target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-windows-msvc19.11.0"
; Function Attrs: uwtable
define dso_local void @"?f@@YAXH@Z"(i32 %x) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
entry:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %try.cont, label %if.then
if.then: ; preds = %entry
invoke void @"?g@@YAXXZ"()
to label %try.cont unwind label %catch.dispatch
catch.dispatch: ; preds = %if.then
%0 = catchswitch within none [label %catch] unwind to caller
catch: ; preds = %catch.dispatch
%1 = catchpad within %0 [i8* null, i32 64, i8* null]
catchret from %1 to label %try.cont
try.cont: ; preds = %entry, %if.then, %catch
ret void
}
declare dso_local void @"?g@@YAXXZ"() local_unnamed_addr #1
declare dso_local i32 @__CxxFrameHandler3(...)

View File

@ -22,8 +22,8 @@
; CHECK: add x29, sp, #32
; CHECK: sub sp, sp, #624
; CHECK: mov x19, sp
; CHECK: orr x1, xzr, #0xfffffffffffffffe
; CHECK: stur x1, [x19]
; CHECK: orr x0, xzr, #0xfffffffffffffffe
; CHECK: stur x0, [x19]
; Now check that x is stored at fp - 20. We check that this is the same
; location accessed from the funclet to retrieve x.

View File

@ -17,8 +17,10 @@ define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) {
; CHECK-LABEL: select_i32_bool_nozext:
; CHECK-NEXT: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32){{$}}
; SLOW-NEXT: i32.select $push0=, $1, $2, $0{{$}}
; SLOW-NEXT: return $pop0{{$}}
; SLOW-NEXT: i32.const $push0=, 1{{$}}
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
; SLOW-NEXT: return $pop2{{$}}
define i32 @select_i32_bool_nozext(i1 %a, i32 %b, i32 %c) {
%cond = select i1 %a, i32 %b, i32 %c
ret i32 %cond
@ -55,8 +57,10 @@ define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) {
; CHECK-LABEL: select_i64_bool_nozext:
; CHECK-NEXT: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64){{$}}
; SLOW-NEXT: i64.select $push0=, $1, $2, $0{{$}}
; SLOW-NEXT: return $pop0{{$}}
; SLOW-NEXT: i32.const $push0=, 1{{$}}
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
; SLOW-NEXT: i64.select $push2=, $1, $2, $pop1{{$}}
; SLOW-NEXT: return $pop2{{$}}
define i64 @select_i64_bool_nozext(i1 %a, i64 %b, i64 %c) {
%cond = select i1 %a, i64 %b, i64 %c
ret i64 %cond
@ -157,3 +161,16 @@ define double @select_f64_ne(i32 %a, double %b, double %c) {
%cond = select i1 %cmp, double %b, double %c
ret double %cond
}
; CHECK-LABEL: pr40805:
; CHECK-NEXT: .functype pr40805 (i32, i32, i32) -> (i32){{$}}
; SLOW-NEXT: i32.const $push0=, 1{{$}}
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
; SLOW-NEXT: return $pop2{{$}}
define i32 @pr40805(i32 %x, i32 %y, i32 %z) {
%a = and i32 %x, 1
%b = icmp ne i32 %a, 0
%c = select i1 %b, i32 %y, i32 %z
ret i32 %c
}

View File

@ -29,7 +29,7 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <16 x i8> @select_v16i8(i1 %c, <16 x i8> %x, <16 x i8> %y) {
define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
ret <16 x i8> %res
}
@ -99,7 +99,7 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <8 x i16> @select_v8i16(i1 %c, <8 x i16> %x, <8 x i16> %y) {
define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
ret <8 x i16> %res
}
@ -170,7 +170,7 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <4 x i32> @select_v4i32(i1 %c, <4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
ret <4 x i32> %res
}
@ -240,7 +240,7 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <2 x i64> @select_v2i64(i1 %c, <2 x i64> %x, <2 x i64> %y) {
define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
ret <2 x i64> %res
}
@ -313,7 +313,7 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <4 x float> @select_v4f32(i1 %c, <4 x float> %x, <4 x float> %y) {
define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) {
%res = select i1 %c, <4 x float> %x, <4 x float> %y
ret <4 x float> %res
}
@ -383,7 +383,7 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
; CHECK-NEXT: return $pop[[R]]{{$}}
define <2 x double> @select_v2f64(i1 %c, <2 x double> %x, <2 x double> %y) {
define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) {
%res = select i1 %c, <2 x double> %x, <2 x double> %y
ret <2 x double> %res
}

View File

@ -37,6 +37,8 @@ target triple = "x86_64--linux"
@global_data = dso_local global [10 x i32] [i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
@static_data = internal global [10 x i32] zeroinitializer, align 16
@extern_data = external global [10 x i32], align 16
@thread_data = external thread_local global i32, align 4
define dso_local i32* @lea_static_data() #0 {
; SMALL-STATIC-LABEL: lea_static_data:
@ -373,6 +375,70 @@ define dso_local void ()* @lea_extern_fn() #0 {
ret void ()* @extern_fn
}
; FIXME: The result is same for small, medium and large model, because we
; specify pie option in the test case. And the type of tls is initial exec tls.
; For pic code. The large model code for pic tls should be emitted as below.
; .L3:
; leaq .L3(%rip), %rbx
; movabsq $_GLOBAL_OFFSET_TABLE_-.L3, %r11
; addq %r11, %rbx
; leaq thread_data@TLSGD(%rip), %rdi
; movabsq $__tls_get_addr@PLTOFF, %rax
; addq %rbx, %rax
; call *%rax
; movl (%rax), %eax
; The medium and small model code for pic tls should be emitted as below.
; data16
; leaq thread_data@TLSGD(%rip), %rdi
; data16
; data16
; rex64
; callq __tls_get_addr@PLT
; movl (%rax), %eax
define dso_local i32 @load_thread_data() #0 {
; SMALL-STATIC-LABEL: load_thread_data:
; SMALL-STATIC: # %bb.0:
; SMALL-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; SMALL-STATIC-NEXT: movl %fs:(%rax), %eax
; SMALL-STATIC-NEXT: retq
;
; MEDIUM-STATIC-LABEL: load_thread_data:
; MEDIUM-STATIC: # %bb.0:
; MEDIUM-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; MEDIUM-STATIC-NEXT: movl %fs:(%rax), %eax
; MEDIUM-STATIC-NEXT: retq
;
; LARGE-STATIC-LABEL: load_thread_data:
; LARGE-STATIC: # %bb.0:
; LARGE-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; LARGE-STATIC-NEXT: movl %fs:(%rax), %eax
; LARGE-STATIC-NEXT: retq
;
; SMALL-PIC-LABEL: load_thread_data:
; SMALL-PIC: # %bb.0:
; SMALL-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; SMALL-PIC-NEXT: movl %fs:(%rax), %eax
; SMALL-PIC-NEXT: retq
;
; MEDIUM-PIC-LABEL: load_thread_data:
; MEDIUM-PIC: # %bb.0:
; MEDIUM-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; MEDIUM-PIC-NEXT: movl %fs:(%rax), %eax
; MEDIUM-PIC-NEXT: retq
;
; LARGE-PIC-LABEL: load_thread_data:
; LARGE-PIC: # %bb.0:
; LARGE-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
; LARGE-PIC-NEXT: movl %fs:(%rax), %eax
; LARGE-PIC-NEXT: retq
;
%1 = load i32, i32* @thread_data, align 4
ret i32 %1
}
attributes #0 = { noinline nounwind uwtable }
!llvm.module.flags = !{!0, !1, !2}

View File

@ -0,0 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx2 | FileCheck %s
; Make sure this sequence doesn't hang in DAG combine.
define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: retl
%a = shufflevector <4 x i64> %y, <4 x i64> <i64 12345, i64 67890, i64 13579, i64 24680>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b = and <8 x i64> %x, %a
%c = trunc <8 x i64> %b to <8 x i32>
ret <8 x i32> %c
}

View File

@ -18,7 +18,6 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
@ -242,6 +241,31 @@ StackTrieNode *findOrCreateStackNode(
return CurrentStack;
}
void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
uint32_t TId, uint32_t PId, bool Symbolize,
const FuncIdConversionHelper &FuncIdHelper,
double EventTimestampUs,
const StackTrieNode &StackCursor,
StringRef FunctionPhenotype) {
OS << " ";
if (Version >= 3) {
OS << llvm::formatv(
R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
R"("ts" : "{4:f4}", "sf" : "{5}" })",
(Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
: llvm::to_string(FuncId)),
FunctionPhenotype, TId, PId, EventTimestampUs,
StackCursor.ExtraData.id);
} else {
OS << llvm::formatv(
R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
R"("ts" : "{3:f3}", "sf" : "{4}" })",
(Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
: llvm::to_string(FuncId)),
FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
}
}
} // namespace
void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
@ -252,14 +276,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
unsigned id_counter = 0;
OS << "{\n \"traceEvents\": [";
DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
std::forward_list<StackTrieNode> NodeStore{};
// Create a JSON Array which will hold all trace events.
json::Array TraceEvents;
int loop_count = 0;
for (const auto &R : Records) {
if (loop_count++ == 0)
OS << "\n";
else
OS << ",\n";
// Chrome trace event format always wants data in micros.
// CyclesPerMicro = CycleHertz / 10^6
// TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
@ -284,15 +312,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// type of B for begin or E for end, thread id, process id,
// timestamp in microseconds, and a stack frame id. The ids are logged
// in an id dictionary after the events.
TraceEvents.push_back(json::Object({
{"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
: llvm::to_string(R.FuncId)},
{"ph", "B"},
{"tid", llvm::to_string(R.TId)},
{"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
{"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
{"sf", llvm::to_string(StackCursor->ExtraData.id)},
}));
writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
FuncIdHelper, EventTimestampUs, *StackCursor, "B");
break;
case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT:
@ -303,51 +324,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
// (And/Or in loop termination below)
StackTrieNode *PreviousCursor = nullptr;
do {
TraceEvents.push_back(json::Object({
{"name", Symbolize
? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
: llvm::to_string(StackCursor->FuncId)},
{"ph", "E"},
{"tid", llvm::to_string(R.TId)},
{"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
{"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
{"sf", llvm::to_string(StackCursor->ExtraData.id)},
}));
if (PreviousCursor != nullptr) {
OS << ",\n";
}
writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
Symbolize, FuncIdHelper, EventTimestampUs,
*StackCursor, "E");
PreviousCursor = StackCursor;
StackCursor = StackCursor->Parent;
} while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
break;
}
}
OS << "\n ],\n"; // Close the Trace Events array.
OS << " "
<< "\"displayTimeUnit\": \"ns\",\n";
// The stackFrames dictionary substantially reduces size of the output file by
// avoiding repeating the entire call stack of function names for each entry.
json::Object StackFrames;
for (const auto &Stack : StacksByStackId) {
const auto &StackId = Stack.first;
const auto &StackFunctionNode = Stack.second;
json::Object::iterator It;
std::tie(It, std::ignore) = StackFrames.insert({
llvm::to_string(StackId),
json::Object{
{"name",
Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
: llvm::to_string(StackFunctionNode->FuncId)}},
});
if (StackFunctionNode->Parent != nullptr)
It->second.getAsObject()->insert(
{"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
OS << R"( "stackFrames": {)";
int stack_frame_count = 0;
for (auto map_iter : StacksByStackId) {
if (stack_frame_count++ == 0)
OS << "\n";
else
OS << ",\n";
OS << " ";
OS << llvm::formatv(
R"("{0}" : { "name" : "{1}")", map_iter.first,
(Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
: llvm::to_string(map_iter.second->FuncId)));
if (map_iter.second->Parent != nullptr)
OS << llvm::formatv(R"(, "parent": "{0}")",
map_iter.second->Parent->ExtraData.id);
OS << " }";
}
json::Object TraceJSON{
{"displayTimeUnit", "ns"},
{"traceEvents", std::move(TraceEvents)},
{"stackFrames", std::move(StackFrames)},
};
// Pretty-print the JSON using two spaces for indentations.
OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
OS << "\n }\n"; // Close the stack frames map.
OS << "}\n"; // Close the JSON entry.
}
namespace llvm {