From 1d6bb9f4171028bd0eb3dfcb59f42b28c279d4be Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 4 Mar 2019 18:25:41 +0000 Subject: [PATCH] Vendor import of llvm release_80 branch r355313: https://llvm.org/svn/llvm-project/llvm/branches/release_80@355313 --- .gitignore | 2 + bindings/go/llvm/ir.go | 2 +- docs/ReleaseNotes.rst | 61 +++------- lib/Target/AArch64/AArch64AsmPrinter.cpp | 13 ++- lib/Target/AArch64/AArch64FrameLowering.cpp | 9 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 4 +- .../WebAssembly/WebAssemblyInstrInteger.td | 7 -- lib/Target/X86/X86ISelDAGToDAG.cpp | 18 ++- lib/Target/X86/X86ISelLowering.cpp | 7 +- test/CodeGen/AArch64/win64-jumptable.ll | 48 ++++++++ test/CodeGen/AArch64/wineh-try-catch-cbz.ll | 40 +++++++ test/CodeGen/AArch64/wineh-try-catch.ll | 4 +- test/CodeGen/WebAssembly/select.ll | 25 +++- test/CodeGen/WebAssembly/simd-select.ll | 12 +- test/CodeGen/X86/code-model-elf.ll | 66 +++++++++++ test/CodeGen/X86/pr40891.ll | 22 ++++ tools/llvm-xray/xray-converter.cpp | 109 ++++++++++-------- 17 files changed, 322 insertions(+), 127 deletions(-) create mode 100644 test/CodeGen/AArch64/win64-jumptable.ll create mode 100644 test/CodeGen/AArch64/wineh-try-catch-cbz.ll create mode 100644 test/CodeGen/X86/pr40891.ll diff --git a/.gitignore b/.gitignore index 0aa0a8a80a96..be58944c9b3a 100644 --- a/.gitignore +++ b/.gitignore @@ -72,6 +72,8 @@ docs/_build # VS2017 and VSCode config files. .vscode .vs +# clangd index +.clangd #==============================================================================# # Files created in tree by the Go bindings. diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go index 1872a2ffe510..3e85137a751a 100644 --- a/bindings/go/llvm/ir.go +++ b/bindings/go/llvm/ir.go @@ -1263,7 +1263,7 @@ func (v Value) Indices() []uint32 { num := C.LLVMGetNumIndices(v.C) indicesPtr := C.LLVMGetIndices(v.C) // https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices - rawIndices := (*[1 << 30]C.uint)(unsafe.Pointer(indicesPtr))[:num:num] + rawIndices := (*[1 << 20]C.uint)(unsafe.Pointer(indicesPtr))[:num:num] indices := make([]uint32, num) for i := range indices { indices[i] = uint32(rawIndices[i]) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index eafe1f17f760..3007a1bcf78b 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -12,7 +12,7 @@ This document contains the release notes for the LLVM Compiler Infrastructure, release 8.0.0. Here we describe the status of LLVM, including major improvements from the previous release, improvements in various subprojects of LLVM, and some of the current users of the code. All LLVM releases may be downloaded -from the `LLVM releases web site `_. +from the `LLVM releases web site `_. For more information about LLVM, including information about the latest release, please check out the `main LLVM web site `_. If you @@ -39,14 +39,19 @@ setting the ``LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN`` CMake variable to ``ON``. +Known Issues +============ + +These are issues that couldn't be fixed before the release. See the bug reports +for the latest status. + +* `PR40547 `_ Clang gets miscompiled by trunk GCC. + +* `PR40761 `_ "asan-dynamic" doesn't work on FreeBSD. + + Non-comprehensive list of changes in this release ================================================= -.. NOTE - For small 1-3 sentence descriptions, just add an entry at the end of - this list. If your description won't fit comfortably in one bullet - point (e.g. maybe you would like to give an example of the - functionality, or simply have a lot to talk about), see the `NOTE` below - for adding a new subsection. * The **llvm-cov** tool can now export lcov trace files using the `-format=lcov` option of the `export` command. @@ -80,15 +85,7 @@ Non-comprehensive list of changes in this release available in the `RFC `_. -.. NOTE - If you would like to document a larger change, then you can add a - subsection about it right here. You can copy the following boilerplate - and un-indent it (the indentation causes it to be inside this comment). - - Special New Feature - ------------------- - - Makes programs 10x faster by doing Special New Thing. +* Windows support for libFuzzer (x86_64). Changes to the LLVM IR ---------------------- @@ -110,17 +107,12 @@ Changes to the AArch64 Target on ARM. -Changes to the ARM Backend --------------------------- - - During this release ... - - Changes to the Hexagon Target ----------------------------- * Added support for Hexagon/HVX V66 ISA. + Changes to the MIPS Target -------------------------- @@ -142,6 +134,7 @@ Changes to the MIPS Target * Numerous bug fixes and code cleanups. + Changes to the PowerPC Target ----------------------------- @@ -153,7 +146,7 @@ Changes to the PowerPC Target * Better overload rules for compatible vector type parameter -* Support constraint ‘wi’, modifier ‘x’ and VSX registers in inline asm +* Support constraint 'wi', modifier 'x' and VSX registers in inline asm * More ``__float128`` support @@ -198,15 +191,6 @@ Changes to the X86 Target * ADCX instruction will no longer be emitted. This instruction is rarely better than the legacy ADC instruction and just increased code size. -Changes to the AMDGPU Target ------------------------------ - - During this release ... - -Changes to the AVR Target ------------------------------ - - During this release ... Changes to the WebAssembly Target --------------------------------- @@ -220,25 +204,16 @@ use for it will be to add support for returning small structs as multiple return values, once the underlying WebAssembly platform itself supports it. Additionally, multithreading support is not yet included in the stable ABI. + Changes to the Nios2 Target --------------------------- * The Nios2 target was removed from this release. -Changes to the OCaml bindings ------------------------------ - - - -Changes to the C API --------------------- - - -Changes to the DAG infrastructure ---------------------------------- Changes to LLDB =============== + * Printed source code is now syntax highlighted in the terminal (only for C languages). diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index 0254a572434f..2e1d1f1130a9 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -471,9 +471,18 @@ void AArch64AsmPrinter::EmitJumpTableInfo() { const std::vector &JT = MJTI->getJumpTables(); if (JT.empty()) return; + const Function &F = MF->getFunction(); const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); - OutStreamer->SwitchSection(ReadOnlySec); + bool JTInDiffSection = + !STI->isTargetCOFF() || + !TLOF.shouldPutJumpTableInFunctionSection( + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, + F); + if (JTInDiffSection) { + // Drop it in the readonly section. + MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM); + OutStreamer->SwitchSection(ReadOnlySec); + } auto AFI = MF->getInfo(); for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 621aa8bc783a..57f630a18c8a 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2108,9 +2108,6 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) ++MBBI; - if (MBBI->isTerminator()) - return; - // Create an UnwindHelp object. int UnwindHelpFI = MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); @@ -2118,8 +2115,10 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( // We need to store -2 into the UnwindHelp object at the start of the // function. DebugLoc DL; - RS->enterBasicBlock(MBB); - unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0); + RS->enterBasicBlockEnd(MBB); + RS->backward(std::prev(MBBI)); + unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass); + assert(DstReg && "There must be a free register after frame setup"); BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) .addReg(DstReg, getKillRegState(true)) diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 4e016525f7e4..219c33ef73c5 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -209,8 +209,8 @@ static std::string computeDataLayout(const Triple &TT, static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { - // AArch64 Darwin is always PIC. - if (TT.isOSDarwin()) + // AArch64 Darwin and Windows are always PIC. + if (TT.isOSDarwin() || TT.isOSWindows()) return Reloc::PIC_; // On ELF platforms the default static relocation model has a smart enough // linker to cope with referencing external symbols defined in a shared diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index d5b63d643697..bd41f46214a3 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -122,10 +122,3 @@ def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs), (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs), (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>; - -// The legalizer inserts an unnecessary `and 1` to make input conform -// to getBooleanContents, which we can lower away. -def : Pat<(select (i32 (and I32:$cond, 1)), I32:$lhs, I32:$rhs), - (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>; -def : Pat<(select (i32 (and I32:$cond, 1)), I64:$lhs, I64:$rhs), - (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5ac153244df9..fe75dbd8eff4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1138,15 +1138,23 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { if (AM.hasSymbolicDisplacement()) return true; + bool IsRIPRelTLS = false; bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; + if (IsRIPRel) { + SDValue Val = N.getOperand(0); + if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) + IsRIPRelTLS = true; + } - // We can't use an addressing mode in the 64-bit large code model. In the - // medium code model, we use can use an mode when RIP wrappers are present. - // That signifies access to globals that are known to be "near", such as the - // GOT itself. + // We can't use an addressing mode in the 64-bit large code model. + // Global TLS addressing is an exception. In the medium code model, + // we use can use a mode when RIP wrappers are present. + // That signifies access to globals that are known to be "near", + // such as the GOT itself. CodeModel::Model M = TM.getCodeModel(); if (Subtarget->is64Bit() && - (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel))) + ((M == CodeModel::Large && !IsRIPRelTLS) || + (M == CodeModel::Medium && !IsRIPRel))) return true; // Base and index reg must be 0 in order to use %rip as base. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e1a6d22f0616..2dfee3a4701e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -38134,8 +38134,11 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, return true; // See if this is a single use constant which can be constant folded. - SDValue BC = peekThroughOneUseBitcasts(Op); - return ISD::isBuildVectorOfConstantSDNodes(BC.getNode()); + // NOTE: We don't peek throught bitcasts here because there is currently + // no support for constant folding truncate+bitcast+vector_of_constants. So + // we'll just send up with a truncate on both operands which will + // get turned back into (truncate (binop)) causing an infinite loop. + return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); }; auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { diff --git a/test/CodeGen/AArch64/win64-jumptable.ll b/test/CodeGen/AArch64/win64-jumptable.ll new file mode 100644 index 000000000000..8148a593c91b --- /dev/null +++ b/test/CodeGen/AArch64/win64-jumptable.ll @@ -0,0 +1,48 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows -aarch64-enable-compress-jump-tables=0 | FileCheck %s + +define void @f(i32 %x) { +entry: + switch i32 %x, label %sw.epilog [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: ; preds = %entry + tail call void @g(i32 0) #2 + br label %sw.epilog + +sw.bb1: ; preds = %entry + tail call void @g(i32 1) #2 + br label %sw.epilog + +sw.bb2: ; preds = %entry + tail call void @g(i32 2) #2 + br label %sw.epilog + +sw.bb3: ; preds = %entry + tail call void @g(i32 3) #2 + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb + tail call void @g(i32 10) #2 + ret void +} + +declare void @g(i32) + +; CHECK: .text +; CHECK: f: +; CHECK: .seh_proc f +; CHECK: b g +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .LJTI0_0: +; CHECK: .word .LBB0_2-.LJTI0_0 +; CHECK: .word .LBB0_3-.LJTI0_0 +; CHECK: .word .LBB0_4-.LJTI0_0 +; CHECK: .word .LBB0_5-.LJTI0_0 +; CHECK: .section .xdata,"dr" +; CHECK: .seh_handlerdata +; CHECK: .text +; CHECK: .seh_endproc diff --git a/test/CodeGen/AArch64/wineh-try-catch-cbz.ll b/test/CodeGen/AArch64/wineh-try-catch-cbz.ll new file mode 100644 index 000000000000..7c64328f0a7d --- /dev/null +++ b/test/CodeGen/AArch64/wineh-try-catch-cbz.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s | FileCheck %s + +; Make sure the prologue is sane. (Doesn't need to exactly match this, +; but the original issue only reproduced if the cbz was immediately +; after the frame setup.) + +; CHECK: sub sp, sp, #32 +; CHECK-NEXT: stp x29, x30, [sp, #16] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: orr x1, xzr, #0xfffffffffffffffe +; CHECK-NEXT: stur x1, [x29, #-16] +; CHECK-NEXT: cbz w0, .LBB0_2 + +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-windows-msvc19.11.0" + +; Function Attrs: uwtable +define dso_local void @"?f@@YAXH@Z"(i32 %x) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %try.cont, label %if.then + +if.then: ; preds = %entry + invoke void @"?g@@YAXXZ"() + to label %try.cont unwind label %catch.dispatch + +catch.dispatch: ; preds = %if.then + %0 = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %1 = catchpad within %0 [i8* null, i32 64, i8* null] + catchret from %1 to label %try.cont + +try.cont: ; preds = %entry, %if.then, %catch + ret void +} + +declare dso_local void @"?g@@YAXXZ"() local_unnamed_addr #1 + +declare dso_local i32 @__CxxFrameHandler3(...) diff --git a/test/CodeGen/AArch64/wineh-try-catch.ll b/test/CodeGen/AArch64/wineh-try-catch.ll index 940a86282d39..f4bb9d50a434 100644 --- a/test/CodeGen/AArch64/wineh-try-catch.ll +++ b/test/CodeGen/AArch64/wineh-try-catch.ll @@ -22,8 +22,8 @@ ; CHECK: add x29, sp, #32 ; CHECK: sub sp, sp, #624 ; CHECK: mov x19, sp -; CHECK: orr x1, xzr, #0xfffffffffffffffe -; CHECK: stur x1, [x19] +; CHECK: orr x0, xzr, #0xfffffffffffffffe +; CHECK: stur x0, [x19] ; Now check that x is stored at fp - 20. We check that this is the same ; location accessed from the funclet to retrieve x. diff --git a/test/CodeGen/WebAssembly/select.ll b/test/CodeGen/WebAssembly/select.ll index daa934f44844..ef18d9183e50 100644 --- a/test/CodeGen/WebAssembly/select.ll +++ b/test/CodeGen/WebAssembly/select.ll @@ -17,8 +17,10 @@ define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) { ; CHECK-LABEL: select_i32_bool_nozext: ; CHECK-NEXT: .functype select_i32_bool_nozext (i32, i32, i32) -> (i32){{$}} -; SLOW-NEXT: i32.select $push0=, $1, $2, $0{{$}} -; SLOW-NEXT: return $pop0{{$}} +; SLOW-NEXT: i32.const $push0=, 1{{$}} +; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}} +; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}} +; SLOW-NEXT: return $pop2{{$}} define i32 @select_i32_bool_nozext(i1 %a, i32 %b, i32 %c) { %cond = select i1 %a, i32 %b, i32 %c ret i32 %cond @@ -55,8 +57,10 @@ define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) { ; CHECK-LABEL: select_i64_bool_nozext: ; CHECK-NEXT: .functype select_i64_bool_nozext (i32, i64, i64) -> (i64){{$}} -; SLOW-NEXT: i64.select $push0=, $1, $2, $0{{$}} -; SLOW-NEXT: return $pop0{{$}} +; SLOW-NEXT: i32.const $push0=, 1{{$}} +; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}} +; SLOW-NEXT: i64.select $push2=, $1, $2, $pop1{{$}} +; SLOW-NEXT: return $pop2{{$}} define i64 @select_i64_bool_nozext(i1 %a, i64 %b, i64 %c) { %cond = select i1 %a, i64 %b, i64 %c ret i64 %cond @@ -157,3 +161,16 @@ define double @select_f64_ne(i32 %a, double %b, double %c) { %cond = select i1 %cmp, double %b, double %c ret double %cond } + +; CHECK-LABEL: pr40805: +; CHECK-NEXT: .functype pr40805 (i32, i32, i32) -> (i32){{$}} +; SLOW-NEXT: i32.const $push0=, 1{{$}} +; SLOW-NEXT: i32.and $push1=, $0, $pop0{{$}} +; SLOW-NEXT: i32.select $push2=, $1, $2, $pop1{{$}} +; SLOW-NEXT: return $pop2{{$}} +define i32 @pr40805(i32 %x, i32 %y, i32 %z) { + %a = and i32 %x, 1 + %b = icmp ne i32 %a, 0 + %c = select i1 %b, i32 %y, i32 %z + ret i32 %c +} diff --git a/test/CodeGen/WebAssembly/simd-select.ll b/test/CodeGen/WebAssembly/simd-select.ll index c871f60e6454..c3af6f9abe60 100644 --- a/test/CodeGen/WebAssembly/simd-select.ll +++ b/test/CodeGen/WebAssembly/simd-select.ll @@ -29,7 +29,7 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <16 x i8> @select_v16i8(i1 %c, <16 x i8> %x, <16 x i8> %y) { +define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) { %res = select i1 %c, <16 x i8> %x, <16 x i8> %y ret <16 x i8> %res } @@ -99,7 +99,7 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) { ; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <8 x i16> @select_v8i16(i1 %c, <8 x i16> %x, <8 x i16> %y) { +define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) { %res = select i1 %c, <8 x i16> %x, <8 x i16> %y ret <8 x i16> %res } @@ -170,7 +170,7 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) { ; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <4 x i32> @select_v4i32(i1 %c, <4 x i32> %x, <4 x i32> %y) { +define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) { %res = select i1 %c, <4 x i32> %x, <4 x i32> %y ret <4 x i32> %res } @@ -240,7 +240,7 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) { ; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <2 x i64> @select_v2i64(i1 %c, <2 x i64> %x, <2 x i64> %y) { +define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) { %res = select i1 %c, <2 x i64> %x, <2 x i64> %y ret <2 x i64> %res } @@ -313,7 +313,7 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) { ; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <4 x float> @select_v4f32(i1 %c, <4 x float> %x, <4 x float> %y) { +define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) { %res = select i1 %c, <4 x float> %x, <4 x float> %y ret <4 x float> %res } @@ -383,7 +383,7 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y ; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}} ; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -define <2 x double> @select_v2f64(i1 %c, <2 x double> %x, <2 x double> %y) { +define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) { %res = select i1 %c, <2 x double> %x, <2 x double> %y ret <2 x double> %res } diff --git a/test/CodeGen/X86/code-model-elf.ll b/test/CodeGen/X86/code-model-elf.ll index 56d3f4c102f0..f7ffd6ea1eb7 100644 --- a/test/CodeGen/X86/code-model-elf.ll +++ b/test/CodeGen/X86/code-model-elf.ll @@ -37,6 +37,8 @@ target triple = "x86_64--linux" @global_data = dso_local global [10 x i32] [i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16 @static_data = internal global [10 x i32] zeroinitializer, align 16 @extern_data = external global [10 x i32], align 16 +@thread_data = external thread_local global i32, align 4 + define dso_local i32* @lea_static_data() #0 { ; SMALL-STATIC-LABEL: lea_static_data: @@ -373,6 +375,70 @@ define dso_local void ()* @lea_extern_fn() #0 { ret void ()* @extern_fn } +; FIXME: The result is same for small, medium and large model, because we +; specify pie option in the test case. And the type of tls is initial exec tls. +; For pic code. The large model code for pic tls should be emitted as below. + +; .L3: +; leaq .L3(%rip), %rbx +; movabsq $_GLOBAL_OFFSET_TABLE_-.L3, %r11 +; addq %r11, %rbx +; leaq thread_data@TLSGD(%rip), %rdi +; movabsq $__tls_get_addr@PLTOFF, %rax +; addq %rbx, %rax +; call *%rax +; movl (%rax), %eax + +; The medium and small model code for pic tls should be emitted as below. +; data16 +; leaq thread_data@TLSGD(%rip), %rdi +; data16 +; data16 +; rex64 +; callq __tls_get_addr@PLT +; movl (%rax), %eax + +define dso_local i32 @load_thread_data() #0 { +; SMALL-STATIC-LABEL: load_thread_data: +; SMALL-STATIC: # %bb.0: +; SMALL-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; SMALL-STATIC-NEXT: movl %fs:(%rax), %eax +; SMALL-STATIC-NEXT: retq +; +; MEDIUM-STATIC-LABEL: load_thread_data: +; MEDIUM-STATIC: # %bb.0: +; MEDIUM-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; MEDIUM-STATIC-NEXT: movl %fs:(%rax), %eax +; MEDIUM-STATIC-NEXT: retq +; +; LARGE-STATIC-LABEL: load_thread_data: +; LARGE-STATIC: # %bb.0: +; LARGE-STATIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; LARGE-STATIC-NEXT: movl %fs:(%rax), %eax +; LARGE-STATIC-NEXT: retq +; +; SMALL-PIC-LABEL: load_thread_data: +; SMALL-PIC: # %bb.0: +; SMALL-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; SMALL-PIC-NEXT: movl %fs:(%rax), %eax +; SMALL-PIC-NEXT: retq +; +; MEDIUM-PIC-LABEL: load_thread_data: +; MEDIUM-PIC: # %bb.0: +; MEDIUM-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; MEDIUM-PIC-NEXT: movl %fs:(%rax), %eax +; MEDIUM-PIC-NEXT: retq +; +; LARGE-PIC-LABEL: load_thread_data: +; LARGE-PIC: # %bb.0: +; LARGE-PIC-NEXT: movq thread_data@GOTTPOFF(%rip), %rax +; LARGE-PIC-NEXT: movl %fs:(%rax), %eax +; LARGE-PIC-NEXT: retq +; + %1 = load i32, i32* @thread_data, align 4 + ret i32 %1 +} + attributes #0 = { noinline nounwind uwtable } !llvm.module.flags = !{!0, !1, !2} diff --git a/test/CodeGen/X86/pr40891.ll b/test/CodeGen/X86/pr40891.ll new file mode 100644 index 000000000000..3f3cfb23d03d --- /dev/null +++ b/test/CodeGen/X86/pr40891.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx2 | FileCheck %s + +; Make sure this sequence doesn't hang in DAG combine. + +define <8 x i32> @foo(<8 x i64> %x, <4 x i64> %y) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0 +; CHECK-NEXT: vandps {{\.LCPI.*}}, %ymm1, %ymm1 +; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,3] +; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7] +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3] +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: retl + %a = shufflevector <4 x i64> %y, <4 x i64> , <8 x i32> + %b = and <8 x i64> %x, %a + %c = trunc <8 x i64> %b to <8 x i32> + ret <8 x i32> %c +} + diff --git a/tools/llvm-xray/xray-converter.cpp b/tools/llvm-xray/xray-converter.cpp index 3f153b99bc93..a682dbe53e3b 100644 --- a/tools/llvm-xray/xray-converter.cpp +++ b/tools/llvm-xray/xray-converter.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/JSON.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" @@ -242,6 +241,31 @@ StackTrieNode *findOrCreateStackNode( return CurrentStack; } +void writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId, + uint32_t TId, uint32_t PId, bool Symbolize, + const FuncIdConversionHelper &FuncIdHelper, + double EventTimestampUs, + const StackTrieNode &StackCursor, + StringRef FunctionPhenotype) { + OS << " "; + if (Version >= 3) { + OS << llvm::formatv( + R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )" + R"("ts" : "{4:f4}", "sf" : "{5}" })", + (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) + : llvm::to_string(FuncId)), + FunctionPhenotype, TId, PId, EventTimestampUs, + StackCursor.ExtraData.id); + } else { + OS << llvm::formatv( + R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" + R"("ts" : "{3:f3}", "sf" : "{4}" })", + (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) + : llvm::to_string(FuncId)), + FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); + } +} + } // namespace void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, @@ -252,14 +276,18 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, unsigned id_counter = 0; + OS << "{\n \"traceEvents\": ["; DenseMap StackCursorByThreadId{}; DenseMap> StackRootsByThreadId{}; DenseMap StacksByStackId{}; std::forward_list NodeStore{}; - - // Create a JSON Array which will hold all trace events. - json::Array TraceEvents; + int loop_count = 0; for (const auto &R : Records) { + if (loop_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + // Chrome trace event format always wants data in micros. // CyclesPerMicro = CycleHertz / 10^6 // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp @@ -284,15 +312,8 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, // type of B for begin or E for end, thread id, process id, // timestamp in microseconds, and a stack frame id. The ids are logged // in an id dictionary after the events. - TraceEvents.push_back(json::Object({ - {"name", Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId) - : llvm::to_string(R.FuncId)}, - {"ph", "B"}, - {"tid", llvm::to_string(R.TId)}, - {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)}, - {"ts", llvm::formatv("{0:f4}", EventTimestampUs)}, - {"sf", llvm::to_string(StackCursor->ExtraData.id)}, - })); + writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize, + FuncIdHelper, EventTimestampUs, *StackCursor, "B"); break; case RecordTypes::EXIT: case RecordTypes::TAIL_EXIT: @@ -303,51 +324,43 @@ void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, // (And/Or in loop termination below) StackTrieNode *PreviousCursor = nullptr; do { - TraceEvents.push_back(json::Object({ - {"name", Symbolize - ? FuncIdHelper.SymbolOrNumber(StackCursor->FuncId) - : llvm::to_string(StackCursor->FuncId)}, - {"ph", "E"}, - {"tid", llvm::to_string(R.TId)}, - {"pid", llvm::to_string(Version >= 3 ? R.PId : 1)}, - {"ts", llvm::formatv("{0:f4}", EventTimestampUs)}, - {"sf", llvm::to_string(StackCursor->ExtraData.id)}, - })); + if (PreviousCursor != nullptr) { + OS << ",\n"; + } + writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId, + Symbolize, FuncIdHelper, EventTimestampUs, + *StackCursor, "E"); PreviousCursor = StackCursor; StackCursor = StackCursor->Parent; } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); break; } } + OS << "\n ],\n"; // Close the Trace Events array. + OS << " " + << "\"displayTimeUnit\": \"ns\",\n"; // The stackFrames dictionary substantially reduces size of the output file by // avoiding repeating the entire call stack of function names for each entry. - json::Object StackFrames; - for (const auto &Stack : StacksByStackId) { - const auto &StackId = Stack.first; - const auto &StackFunctionNode = Stack.second; - json::Object::iterator It; - std::tie(It, std::ignore) = StackFrames.insert({ - llvm::to_string(StackId), - json::Object{ - {"name", - Symbolize ? FuncIdHelper.SymbolOrNumber(StackFunctionNode->FuncId) - : llvm::to_string(StackFunctionNode->FuncId)}}, - }); - - if (StackFunctionNode->Parent != nullptr) - It->second.getAsObject()->insert( - {"parent", llvm::to_string(StackFunctionNode->Parent->ExtraData.id)}); + OS << R"( "stackFrames": {)"; + int stack_frame_count = 0; + for (auto map_iter : StacksByStackId) { + if (stack_frame_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + OS << " "; + OS << llvm::formatv( + R"("{0}" : { "name" : "{1}")", map_iter.first, + (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) + : llvm::to_string(map_iter.second->FuncId))); + if (map_iter.second->Parent != nullptr) + OS << llvm::formatv(R"(, "parent": "{0}")", + map_iter.second->Parent->ExtraData.id); + OS << " }"; } - - json::Object TraceJSON{ - {"displayTimeUnit", "ns"}, - {"traceEvents", std::move(TraceEvents)}, - {"stackFrames", std::move(StackFrames)}, - }; - - // Pretty-print the JSON using two spaces for indentations. - OS << formatv("{0:2}", json::Value(std::move(TraceJSON))); + OS << "\n }\n"; // Close the stack frames map. + OS << "}\n"; // Close the JSON entry. } namespace llvm {