Vendor import of llvm release_80 branch r355313:
https://llvm.org/svn/llvm-project/llvm/branches/release_80@355313
This commit is contained in:
parent
bd7f07563c
commit
1d6bb9f417
2
.gitignore
vendored
2
.gitignore
vendored
@ -72,6 +72,8 @@ docs/_build
|
||||
# VS2017 and VSCode config files.
|
||||
.vscode
|
||||
.vs
|
||||
# clangd index
|
||||
.clangd
|
||||
|
||||
#==============================================================================#
|
||||
# Files created in tree by the Go bindings.
|
||||
|
@ -1263,7 +1263,7 @@ func (v Value) Indices() []uint32 {
|
||||
num := C.LLVMGetNumIndices(v.C)
|
||||
indicesPtr := C.LLVMGetIndices(v.C)
|
||||
// https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
|
||||
rawIndices := (*[1 << 30]C.uint)(unsafe.Pointer(indicesPtr))[:num:num]
|
||||
rawIndices := (*[1 << 20]C.uint)(unsafe.Pointer(indicesPtr))[:num:num]
|
||||
indices := make([]uint32, num)
|
||||
for i := range indices {
|
||||
indices[i] = uint32(rawIndices[i])
|
||||
|
@ -12,7 +12,7 @@ This document contains the release notes for the LLVM Compiler Infrastructure,
|
||||
release 8.0.0. Here we describe the status of LLVM, including major improvements
|
||||
from the previous release, improvements in various subprojects of LLVM, and
|
||||
some of the current users of the code. All LLVM releases may be downloaded
|
||||
from the `LLVM releases web site <https://llvm.org/releases/>`_.
|
||||
from the `LLVM releases web site <https://releases.llvm.org/>`_.
|
||||
|
||||
For more information about LLVM, including information about the latest
|
||||
release, please check out the `main LLVM web site <https://llvm.org/>`_. If you
|
||||
@ -39,14 +39,19 @@ setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to
|
||||
``ON``.
|
||||
|
||||
|
||||
Known Issues
|
||||
============
|
||||
|
||||
These are issues that couldn't be fixed before the release. See the bug reports
|
||||
for the latest status.
|
||||
|
||||
* `PR40547 <https://llvm.org/pr40547>`_ Clang gets miscompiled by trunk GCC.
|
||||
|
||||
* `PR40761 <https://llvm.org/pr40761>`_ "asan-dynamic" doesn't work on FreeBSD.
|
||||
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
.. NOTE
|
||||
For small 1-3 sentence descriptions, just add an entry at the end of
|
||||
this list. If your description won't fit comfortably in one bullet
|
||||
point (e.g. maybe you would like to give an example of the
|
||||
functionality, or simply have a lot to talk about), see the `NOTE` below
|
||||
for adding a new subsection.
|
||||
|
||||
* The **llvm-cov** tool can now export lcov trace files using the
|
||||
`-format=lcov` option of the `export` command.
|
||||
@ -80,15 +85,7 @@ Non-comprehensive list of changes in this release
|
||||
available in the `RFC
|
||||
<https://lists.llvm.org/pipermail/llvm-dev/2018-November/127461.html>`_.
|
||||
|
||||
.. NOTE
|
||||
If you would like to document a larger change, then you can add a
|
||||
subsection about it right here. You can copy the following boilerplate
|
||||
and un-indent it (the indentation causes it to be inside this comment).
|
||||
|
||||
Special New Feature
|
||||
-------------------
|
||||
|
||||
Makes programs 10x faster by doing Special New Thing.
|
||||
* Windows support for libFuzzer (x86_64).
|
||||
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
@ -110,17 +107,12 @@ Changes to the AArch64 Target
|
||||
on ARM.
|
||||
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
|
||||
Changes to the Hexagon Target
|
||||
-----------------------------
|
||||
|
||||
* Added support for Hexagon/HVX V66 ISA.
|
||||
|
||||
|
||||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
@ -142,6 +134,7 @@ Changes to the MIPS Target
|
||||
|
||||
* Numerous bug fixes and code cleanups.
|
||||
|
||||
|
||||
Changes to the PowerPC Target
|
||||
-----------------------------
|
||||
|
||||
@ -153,7 +146,7 @@ Changes to the PowerPC Target
|
||||
|
||||
* Better overload rules for compatible vector type parameter
|
||||
|
||||
* Support constraint ‘wi’, modifier ‘x’ and VSX registers in inline asm
|
||||
* Support constraint 'wi', modifier 'x' and VSX registers in inline asm
|
||||
|
||||
* More ``__float128`` support
|
||||
|
||||
@ -198,15 +191,6 @@ Changes to the X86 Target
|
||||
* ADCX instruction will no longer be emitted. This instruction is rarely better
|
||||
than the legacy ADC instruction and just increased code size.
|
||||
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
Changes to the AVR Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
Changes to the WebAssembly Target
|
||||
---------------------------------
|
||||
@ -220,25 +204,16 @@ use for it will be to add support for returning small structs as multiple
|
||||
return values, once the underlying WebAssembly platform itself supports it.
|
||||
Additionally, multithreading support is not yet included in the stable ABI.
|
||||
|
||||
|
||||
Changes to the Nios2 Target
|
||||
---------------------------
|
||||
|
||||
* The Nios2 target was removed from this release.
|
||||
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
|
||||
|
||||
|
||||
Changes to the C API
|
||||
--------------------
|
||||
|
||||
|
||||
Changes to the DAG infrastructure
|
||||
---------------------------------
|
||||
|
||||
Changes to LLDB
|
||||
===============
|
||||
|
||||
* Printed source code is now syntax highlighted in the terminal (only for C
|
||||
languages).
|
||||
|
||||
|
@ -471,9 +471,18 @@ void AArch64AsmPrinter::EmitJumpTableInfo() {
|
||||
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
|
||||
if (JT.empty()) return;
|
||||
|
||||
const Function &F = MF->getFunction();
|
||||
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
|
||||
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
|
||||
OutStreamer->SwitchSection(ReadOnlySec);
|
||||
bool JTInDiffSection =
|
||||
!STI->isTargetCOFF() ||
|
||||
!TLOF.shouldPutJumpTableInFunctionSection(
|
||||
MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
|
||||
F);
|
||||
if (JTInDiffSection) {
|
||||
// Drop it in the readonly section.
|
||||
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
|
||||
OutStreamer->SwitchSection(ReadOnlySec);
|
||||
}
|
||||
|
||||
auto AFI = MF->getInfo<AArch64FunctionInfo>();
|
||||
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
|
||||
|
@ -2108,9 +2108,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
|
||||
++MBBI;
|
||||
|
||||
if (MBBI->isTerminator())
|
||||
return;
|
||||
|
||||
// Create an UnwindHelp object.
|
||||
int UnwindHelpFI =
|
||||
MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
|
||||
@ -2118,8 +2115,10 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||
// We need to store -2 into the UnwindHelp object at the start of the
|
||||
// function.
|
||||
DebugLoc DL;
|
||||
RS->enterBasicBlock(MBB);
|
||||
unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0);
|
||||
RS->enterBasicBlockEnd(MBB);
|
||||
RS->backward(std::prev(MBBI));
|
||||
unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
|
||||
assert(DstReg && "There must be a free register after frame setup");
|
||||
BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
|
||||
BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
|
||||
.addReg(DstReg, getKillRegState(true))
|
||||
|
@ -209,8 +209,8 @@ static std::string computeDataLayout(const Triple &TT,
|
||||
|
||||
static Reloc::Model getEffectiveRelocModel(const Triple &TT,
|
||||
Optional<Reloc::Model> RM) {
|
||||
// AArch64 Darwin is always PIC.
|
||||
if (TT.isOSDarwin())
|
||||
// AArch64 Darwin and Windows are always PIC.
|
||||
if (TT.isOSDarwin() || TT.isOSWindows())
|
||||
return Reloc::PIC_;
|
||||
// On ELF platforms the default static relocation model has a smart enough
|
||||
// linker to cope with referencing external symbols defined in a shared
|
||||
|
@ -122,10 +122,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
|
||||
(SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>;
|
||||
def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
|
||||
(SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>;
|
||||
|
||||
// The legalizer inserts an unnecessary `and 1` to make input conform
|
||||
// to getBooleanContents, which we can lower away.
|
||||
def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs),
|
||||
(SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>;
|
||||
def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs),
|
||||
(SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>;
|
||||
|
@ -1138,15 +1138,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
|
||||
if (AM.hasSymbolicDisplacement())
|
||||
return true;
|
||||
|
||||
bool IsRIPRelTLS = false;
|
||||
bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
|
||||
if (IsRIPRel) {
|
||||
SDValue Val = N.getOperand(0);
|
||||
if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
|
||||
IsRIPRelTLS = true;
|
||||
}
|
||||
|
||||
// We can't use an addressing mode in the 64-bit large code model. In the
|
||||
// medium code model, we use can use an mode when RIP wrappers are present.
|
||||
// That signifies access to globals that are known to be "near", such as the
|
||||
// GOT itself.
|
||||
// We can't use an addressing mode in the 64-bit large code model.
|
||||
// Global TLS addressing is an exception. In the medium code model,
|
||||
// we use can use a mode when RIP wrappers are present.
|
||||
// That signifies access to globals that are known to be "near",
|
||||
// such as the GOT itself.
|
||||
CodeModel::Model M = TM.getCodeModel();
|
||||
if (Subtarget->is64Bit() &&
|
||||
(M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
|
||||
((M == CodeModel::Large && !IsRIPRelTLS) ||
|
||||
(M == CodeModel::Medium && !IsRIPRel)))
|
||||
return true;
|
||||
|
||||
// Base and index reg must be 0 in order to use %rip as base.
|
||||
|
@ -38134,8 +38134,11 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
|
||||
return true;
|
||||
|
||||
// See if this is a single use constant which can be constant folded.
|
||||
SDValue BC = peekThroughOneUseBitcasts(Op);
|
||||
return ISD::isBuildVectorOfConstantSDNodes(BC.getNode());
|
||||
// NOTE: We don't peek throught bitcasts here because there is currently
|
||||
// no support for constant folding truncate+bitcast+vector_of_constants. So
|
||||
// we'll just send up with a truncate on both operands which will
|
||||
// get turned back into (truncate (binop)) causing an infinite loop.
|
||||
return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
|
||||
};
|
||||
|
||||
auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
|
||||
|
48
test/CodeGen/AArch64/win64-jumptable.ll
Normal file
48
test/CodeGen/AArch64/win64-jumptable.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 | FileCheck %s
|
||||
|
||||
define void @f(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %sw.epilog [
|
||||
i32 0, label %sw.bb
|
||||
i32 1, label %sw.bb1
|
||||
i32 2, label %sw.bb2
|
||||
i32 3, label %sw.bb3
|
||||
]
|
||||
|
||||
sw.bb: ; preds = %entry
|
||||
tail call void @g(i32 0) #2
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb1: ; preds = %entry
|
||||
tail call void @g(i32 1) #2
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb2: ; preds = %entry
|
||||
tail call void @g(i32 2) #2
|
||||
br label %sw.epilog
|
||||
|
||||
sw.bb3: ; preds = %entry
|
||||
tail call void @g(i32 3) #2
|
||||
br label %sw.epilog
|
||||
|
||||
sw.epilog: ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
|
||||
tail call void @g(i32 10) #2
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @g(i32)
|
||||
|
||||
; CHECK: .text
|
||||
; CHECK: f:
|
||||
; CHECK: .seh_proc f
|
||||
; CHECK: b g
|
||||
; CHECK-NEXT: .p2align 2
|
||||
; CHECK-NEXT: .LJTI0_0:
|
||||
; CHECK: .word .LBB0_2-.LJTI0_0
|
||||
; CHECK: .word .LBB0_3-.LJTI0_0
|
||||
; CHECK: .word .LBB0_4-.LJTI0_0
|
||||
; CHECK: .word .LBB0_5-.LJTI0_0
|
||||
; CHECK: .section .xdata,"dr"
|
||||
; CHECK: .seh_handlerdata
|
||||
; CHECK: .text
|
||||
; CHECK: .seh_endproc
|
40
test/CodeGen/AArch64/wineh-try-catch-cbz.ll
Normal file
40
test/CodeGen/AArch64/wineh-try-catch-cbz.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
; Make sure the prologue is sane. (Doesn't need to exactly match this,
|
||||
; but the original issue only reproduced if the cbz was immediately
|
||||
; after the frame setup.)
|
||||
|
||||
; CHECK: sub sp, sp, #32
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #16]
|
||||
; CHECK-NEXT: add x29, sp, #16
|
||||
; CHECK-NEXT: orr x1, xzr, #0xfffffffffffffffe
|
||||
; CHECK-NEXT: stur x1, [x29, #-16]
|
||||
; CHECK-NEXT: cbz w0, .LBB0_2
|
||||
|
||||
target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-windows-msvc19.11.0"
|
||||
|
||||
; Function Attrs: uwtable
|
||||
define dso_local void @"?f@@YAXH@Z"(i32 %x) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %x, 0
|
||||
br i1 %cmp, label %try.cont, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
invoke void @"?g@@YAXXZ"()
|
||||
to label %try.cont unwind label %catch.dispatch
|
||||
|
||||
catch.dispatch: ; preds = %if.then
|
||||
%0 = catchswitch within none [label %catch] unwind to caller
|
||||
|
||||
catch: ; preds = %catch.dispatch
|
||||
%1 = catchpad within %0 [i8* null, i32 64, i8* null]
|
||||
catchret from %1 to label %try.cont
|
||||
|
||||
try.cont: ; preds = %entry, %if.then, %catch
|
||||
ret void
|
||||
}
|
||||
|
||||
declare dso_local void @"?g@@YAXXZ"() local_unnamed_addr #1
|
||||
|
||||
declare dso_local i32 @__CxxFrameHandler3(...)
|
@ -22,8 +22,8 @@
|
||||
; CHECK: add x29, sp, #32
|
||||
; CHECK: sub sp, sp, #624
|
||||
; CHECK: mov x19, sp
|
||||
; CHECK: orr x1, xzr, #0xfffffffffffffffe
|
||||
; CHECK: stur x1, [x19]
|
||||
; CHECK: orr x0, xzr, #0xfffffffffffffffe
|
||||
; CHECK: stur x0, [x19]
|
||||
|
||||
; Now check that x is stored at fp - 20. We check that this is the same
|
||||
; location accessed from the funclet to retrieve x.
|
||||
|
@ -17,8 +17,10 @@ define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) {
|
||||
|
||||
; CHECK-LABEL: select_i32_bool_nozext:
|
||||
; CHECK-NEXT: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32){{$}}
|
||||
; SLOW-NEXT: i32.select $push0=, $1, $2, $0{{$}}
|
||||
; SLOW-NEXT: return $pop0{{$}}
|
||||
; SLOW-NEXT: i32.const $push0=, 1{{$}}
|
||||
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
|
||||
; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
|
||||
; SLOW-NEXT: return $pop2{{$}}
|
||||
define i32 @select_i32_bool_nozext(i1 %a, i32 %b, i32 %c) {
|
||||
%cond = select i1 %a, i32 %b, i32 %c
|
||||
ret i32 %cond
|
||||
@ -55,8 +57,10 @@ define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) {
|
||||
|
||||
; CHECK-LABEL: select_i64_bool_nozext:
|
||||
; CHECK-NEXT: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64){{$}}
|
||||
; SLOW-NEXT: i64.select $push0=, $1, $2, $0{{$}}
|
||||
; SLOW-NEXT: return $pop0{{$}}
|
||||
; SLOW-NEXT: i32.const $push0=, 1{{$}}
|
||||
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
|
||||
; SLOW-NEXT: i64.select $push2=, $1, $2, $pop1{{$}}
|
||||
; SLOW-NEXT: return $pop2{{$}}
|
||||
define i64 @select_i64_bool_nozext(i1 %a, i64 %b, i64 %c) {
|
||||
%cond = select i1 %a, i64 %b, i64 %c
|
||||
ret i64 %cond
|
||||
@ -157,3 +161,16 @@ define double @select_f64_ne(i32 %a, double %b, double %c) {
|
||||
%cond = select i1 %cmp, double %b, double %c
|
||||
ret double %cond
|
||||
}
|
||||
|
||||
; CHECK-LABEL: pr40805:
|
||||
; CHECK-NEXT: .functype pr40805 (i32, i32, i32) -> (i32){{$}}
|
||||
; SLOW-NEXT: i32.const $push0=, 1{{$}}
|
||||
; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}}
|
||||
; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}}
|
||||
; SLOW-NEXT: return $pop2{{$}}
|
||||
define i32 @pr40805(i32 %x, i32 %y, i32 %z) {
|
||||
%a = and i32 %x, 1
|
||||
%b = icmp ne i32 %a, 0
|
||||
%c = select i1 %b, i32 %y, i32 %z
|
||||
ret i32 %c
|
||||
}
|
||||
|
@ -29,7 +29,7 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
|
||||
; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <16 x i8> @select_v16i8(i1 %c, <16 x i8> %x, <16 x i8> %y) {
|
||||
define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
|
||||
%res = select i1 %c, <16 x i8> %x, <16 x i8> %y
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
@ -99,7 +99,7 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
|
||||
; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <8 x i16> @select_v8i16(i1 %c, <8 x i16> %x, <8 x i16> %y) {
|
||||
define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
|
||||
%res = select i1 %c, <8 x i16> %x, <8 x i16> %y
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
@ -170,7 +170,7 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
|
||||
; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x i32> @select_v4i32(i1 %c, <4 x i32> %x, <4 x i32> %y) {
|
||||
define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
|
||||
%res = select i1 %c, <4 x i32> %x, <4 x i32> %y
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
@ -240,7 +240,7 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
|
||||
; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x i64> @select_v2i64(i1 %c, <2 x i64> %x, <2 x i64> %y) {
|
||||
define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
|
||||
%res = select i1 %c, <2 x i64> %x, <2 x i64> %y
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
@ -313,7 +313,7 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
|
||||
; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <4 x float> @select_v4f32(i1 %c, <4 x float> %x, <4 x float> %y) {
|
||||
define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) {
|
||||
%res = select i1 %c, <4 x float> %x, <4 x float> %y
|
||||
ret <4 x float> %res
|
||||
}
|
||||
@ -383,7 +383,7 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
|
||||
; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
|
||||
; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
define <2 x double> @select_v2f64(i1 %c, <2 x double> %x, <2 x double> %y) {
|
||||
define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) {
|
||||
%res = select i1 %c, <2 x double> %x, <2 x double> %y
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
@ -37,6 +37,8 @@ target triple = "x86_64--linux"
|
||||
@global_data = dso_local global [10 x i32] [i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
|
||||
@static_data = internal global [10 x i32] zeroinitializer, align 16
|
||||
@extern_data = external global [10 x i32], align 16
|
||||
@thread_data = external thread_local global i32, align 4
|
||||
|
||||
|
||||
define dso_local i32* @lea_static_data() #0 {
|
||||
; SMALL-STATIC-LABEL: lea_static_data:
|
||||
@ -373,6 +375,70 @@ define dso_local void ()* @lea_extern_fn() #0 {
|
||||
ret void ()* @extern_fn
|
||||
}
|
||||
|
||||
; FIXME: The result is same for small, medium and large model, because we
|
||||
; specify pie option in the test case. And the type of tls is initial exec tls.
|
||||
; For pic code. The large model code for pic tls should be emitted as below.
|
||||
|
||||
; .L3:
|
||||
; leaq .L3(%rip), %rbx
|
||||
; movabsq $_GLOBAL_OFFSET_TABLE_-.L3, %r11
|
||||
; addq %r11, %rbx
|
||||
; leaq thread_data@TLSGD(%rip), %rdi
|
||||
; movabsq $__tls_get_addr@PLTOFF, %rax
|
||||
; addq %rbx, %rax
|
||||
; call *%rax
|
||||
; movl (%rax), %eax
|
||||
|
||||
; The medium and small model code for pic tls should be emitted as below.
|
||||
; data16
|
||||
; leaq thread_data@TLSGD(%rip), %rdi
|
||||
; data16
|
||||
; data16
|
||||
; rex64
|
||||
; callq __tls_get_addr@PLT
|
||||
; movl (%rax), %eax
|
||||
|
||||
define dso_local i32 @load_thread_data() #0 {
|
||||
; SMALL-STATIC-LABEL: load_thread_data:
|
||||
; SMALL-STATIC: # %bb.0:
|
||||
; SMALL-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; SMALL-STATIC-NEXT: movl %fs:(%rax), %eax
|
||||
; SMALL-STATIC-NEXT: retq
|
||||
;
|
||||
; MEDIUM-STATIC-LABEL: load_thread_data:
|
||||
; MEDIUM-STATIC: # %bb.0:
|
||||
; MEDIUM-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; MEDIUM-STATIC-NEXT: movl %fs:(%rax), %eax
|
||||
; MEDIUM-STATIC-NEXT: retq
|
||||
;
|
||||
; LARGE-STATIC-LABEL: load_thread_data:
|
||||
; LARGE-STATIC: # %bb.0:
|
||||
; LARGE-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; LARGE-STATIC-NEXT: movl %fs:(%rax), %eax
|
||||
; LARGE-STATIC-NEXT: retq
|
||||
;
|
||||
; SMALL-PIC-LABEL: load_thread_data:
|
||||
; SMALL-PIC: # %bb.0:
|
||||
; SMALL-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; SMALL-PIC-NEXT: movl %fs:(%rax), %eax
|
||||
; SMALL-PIC-NEXT: retq
|
||||
;
|
||||
; MEDIUM-PIC-LABEL: load_thread_data:
|
||||
; MEDIUM-PIC: # %bb.0:
|
||||
; MEDIUM-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; MEDIUM-PIC-NEXT: movl %fs:(%rax), %eax
|
||||
; MEDIUM-PIC-NEXT: retq
|
||||
;
|
||||
; LARGE-PIC-LABEL: load_thread_data:
|
||||
; LARGE-PIC: # %bb.0:
|
||||
; LARGE-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax
|
||||
; LARGE-PIC-NEXT: movl %fs:(%rax), %eax
|
||||
; LARGE-PIC-NEXT: retq
|
||||
;
|
||||
%1 = load i32, i32* @thread_data, align 4
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
attributes #0 = { noinline nounwind uwtable }
|
||||
|
||||
!llvm.module.flags = !{!0, !1, !2}
|
||||
|
22
test/CodeGen/X86/pr40891.ll
Normal file
22
test/CodeGen/X86/pr40891.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx2 | FileCheck %s
|
||||
|
||||
; Make sure this sequence doesn't hang in DAG combine.
|
||||
|
||||
define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) {
|
||||
; CHECK-LABEL: foo:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retl
|
||||
%a = shufflevector <4 x i64> %y, <4 x i64> <i64 12345, i64 67890, i64 13579, i64 24680>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%b = and <8 x i64> %x, %a
|
||||
%c = trunc <8 x i64> %b to <8 x i32>
|
||||
ret <8 x i32> %c
|
||||
}
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "llvm/Support/EndianStream.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include "llvm/Support/JSON.h"
|
||||
#include "llvm/Support/ScopedPrinter.h"
|
||||
#include "llvm/Support/YAMLTraits.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -242,6 +241,31 @@ StackTrieNode *findOrCreateStackNode(
|
||||
return CurrentStack;
|
||||
}
|
||||
|
||||
void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId,
|
||||
uint32_t TId, uint32_t PId, bool Symbolize,
|
||||
const FuncIdConversionHelper &FuncIdHelper,
|
||||
double EventTimestampUs,
|
||||
const StackTrieNode &StackCursor,
|
||||
StringRef FunctionPhenotype) {
|
||||
OS << " ";
|
||||
if (Version >= 3) {
|
||||
OS << llvm::formatv(
|
||||
R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )"
|
||||
R"("ts" : "{4:f4}", "sf" : "{5}" })",
|
||||
(Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
|
||||
: llvm::to_string(FuncId)),
|
||||
FunctionPhenotype, TId, PId, EventTimestampUs,
|
||||
StackCursor.ExtraData.id);
|
||||
} else {
|
||||
OS << llvm::formatv(
|
||||
R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )"
|
||||
R"("ts" : "{3:f3}", "sf" : "{4}" })",
|
||||
(Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId)
|
||||
: llvm::to_string(FuncId)),
|
||||
FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
|
||||
@ -252,14 +276,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
|
||||
|
||||
unsigned id_counter = 0;
|
||||
|
||||
OS << "{\n \"traceEvents\": [";
|
||||
DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{};
|
||||
DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{};
|
||||
DenseMap<unsigned, StackTrieNode *> StacksByStackId{};
|
||||
std::forward_list<StackTrieNode> NodeStore{};
|
||||
|
||||
// Create a JSON Array which will hold all trace events.
|
||||
json::Array TraceEvents;
|
||||
int loop_count = 0;
|
||||
for (const auto &R : Records) {
|
||||
if (loop_count++ == 0)
|
||||
OS << "\n";
|
||||
else
|
||||
OS << ",\n";
|
||||
|
||||
// Chrome trace event format always wants data in micros.
|
||||
// CyclesPerMicro = CycleHertz / 10^6
|
||||
// TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp
|
||||
@ -284,15 +312,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
|
||||
// type of B for begin or E for end, thread id, process id,
|
||||
// timestamp in microseconds, and a stack frame id. The ids are logged
|
||||
// in an id dictionary after the events.
|
||||
TraceEvents.push_back(json::Object({
|
||||
{"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId)
|
||||
: llvm::to_string(R.FuncId)},
|
||||
{"ph", "B"},
|
||||
{"tid", llvm::to_string(R.TId)},
|
||||
{"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
|
||||
{"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
|
||||
{"sf", llvm::to_string(StackCursor->ExtraData.id)},
|
||||
}));
|
||||
writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize,
|
||||
FuncIdHelper, EventTimestampUs, *StackCursor, "B");
|
||||
break;
|
||||
case RecordTypes::EXIT:
|
||||
case RecordTypes::TAIL_EXIT:
|
||||
@ -303,51 +324,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records,
|
||||
// (And/Or in loop termination below)
|
||||
StackTrieNode *PreviousCursor = nullptr;
|
||||
do {
|
||||
TraceEvents.push_back(json::Object({
|
||||
{"name", Symbolize
|
||||
? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId)
|
||||
: llvm::to_string(StackCursor->FuncId)},
|
||||
{"ph", "E"},
|
||||
{"tid", llvm::to_string(R.TId)},
|
||||
{"pid", llvm::to_string(Version >= 3 ? R.PId : 1)},
|
||||
{"ts", llvm::formatv("{0:f4}", EventTimestampUs)},
|
||||
{"sf", llvm::to_string(StackCursor->ExtraData.id)},
|
||||
}));
|
||||
if (PreviousCursor != nullptr) {
|
||||
OS << ",\n";
|
||||
}
|
||||
writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId,
|
||||
Symbolize, FuncIdHelper, EventTimestampUs,
|
||||
*StackCursor, "E");
|
||||
PreviousCursor = StackCursor;
|
||||
StackCursor = StackCursor->Parent;
|
||||
} while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
OS << "\n ],\n"; // Close the Trace Events array.
|
||||
OS << " "
|
||||
<< "\"displayTimeUnit\": \"ns\",\n";
|
||||
|
||||
// The stackFrames dictionary substantially reduces size of the output file by
|
||||
// avoiding repeating the entire call stack of function names for each entry.
|
||||
json::Object StackFrames;
|
||||
for (const auto &Stack : StacksByStackId) {
|
||||
const auto &StackId = Stack.first;
|
||||
const auto &StackFunctionNode = Stack.second;
|
||||
json::Object::iterator It;
|
||||
std::tie(It, std::ignore) = StackFrames.insert({
|
||||
llvm::to_string(StackId),
|
||||
json::Object{
|
||||
{"name",
|
||||
Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId)
|
||||
: llvm::to_string(StackFunctionNode->FuncId)}},
|
||||
});
|
||||
|
||||
if (StackFunctionNode->Parent != nullptr)
|
||||
It->second.getAsObject()->insert(
|
||||
{"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)});
|
||||
OS << R"( "stackFrames": {)";
|
||||
int stack_frame_count = 0;
|
||||
for (auto map_iter : StacksByStackId) {
|
||||
if (stack_frame_count++ == 0)
|
||||
OS << "\n";
|
||||
else
|
||||
OS << ",\n";
|
||||
OS << " ";
|
||||
OS << llvm::formatv(
|
||||
R"("{0}" : { "name" : "{1}")", map_iter.first,
|
||||
(Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId)
|
||||
: llvm::to_string(map_iter.second->FuncId)));
|
||||
if (map_iter.second->Parent != nullptr)
|
||||
OS << llvm::formatv(R"(, "parent": "{0}")",
|
||||
map_iter.second->Parent->ExtraData.id);
|
||||
OS << " }";
|
||||
}
|
||||
|
||||
json::Object TraceJSON{
|
||||
{"displayTimeUnit", "ns"},
|
||||
{"traceEvents", std::move(TraceEvents)},
|
||||
{"stackFrames", std::move(StackFrames)},
|
||||
};
|
||||
|
||||
// Pretty-print the JSON using two spaces for indentations.
|
||||
OS << formatv("{0:2}", json::Value(std::move(TraceJSON)));
|
||||
OS << "\n }\n"; // Close the stack frames map.
|
||||
OS << "}\n"; // Close the JSON entry.
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
Loading…
Reference in New Issue
Block a user