From 5c03f3e190559347c835382d61bb1b590e74aa4c Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 4 Aug 2018 13:15:05 +0000 Subject: [PATCH] Vendor import of llvm release_70 branch r338892: https://llvm.org/svn/llvm-project/llvm/branches/release_70@338892 --- CMakeLists.txt | 4 +- docs/ReleaseNotes.rst | 8 +- include/llvm/Support/DebugCounter.h | 28 +- lib/Support/DebugCounter.cpp | 2 + lib/Target/AArch64/AArch64InstrFormats.td | 4 +- lib/Target/PowerPC/PPCISelLowering.cpp | 22 +- lib/Target/PowerPC/PPCInstrVSX.td | 86 +++++ lib/Target/X86/X86FastISel.cpp | 4 + test/CodeGen/AArch64/f16-instructions.ll | 30 ++ test/CodeGen/PowerPC/build-vector-tests.ll | 357 ++++++++---------- .../CodeGen/X86/absolute-bit-mask-fastisel.ll | 28 ++ test/tools/llvm-ar/invalid-command-line.test | 2 +- tools/llvm-ar/llvm-ar.cpp | 54 ++- tools/llvm-shlib/CMakeLists.txt | 4 - utils/release/tag.sh | 2 +- 15 files changed, 384 insertions(+), 251 deletions(-) create mode 100644 test/CodeGen/X86/absolute-bit-mask-fastisel.ll diff --git a/CMakeLists.txt b/CMakeLists.txt index d31112c093ce..32134e250197 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH) set(LLVM_VERSION_PATCH 0) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) - set(LLVM_VERSION_SUFFIX svn) + set(LLVM_VERSION_SUFFIX "") endif() if (NOT PACKAGE_VERSION) @@ -557,8 +557,6 @@ if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB) endif() option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) -option(LLVM_DYLIB_SYMBOL_VERSIONING OFF) - option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF) if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES))) set(LLVM_USE_HOST_TOOLS ON) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index a6942c019141..58fb8828484c 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -65,7 +65,7 @@ Non-comprehensive list of changes in this release results for code that is relying on the undefined behavior of overflowing casts. The optimization can be disabled by specifying a function attribute: "strict-float-cast-overflow"="false". This attribute may be created by the - clang option :option:`-fno-strict-float-cast-overflow`. + clang option ``-fno-strict-float-cast-overflow``. Code sanitizers can be used to detect affected patterns. The option for detecting this problem alone is "-fsanitize=float-cast-overflow": @@ -109,6 +109,12 @@ Non-comprehensive list of changes in this release it's now a better choice even on the heap (although when TinyPtrVector works, it's even smaller). +* Preliminary/experimental support for DWARF v5 debugging information, + including the new .debug_names accelerator table. DWARF emitted at ``-O0`` + should be fully DWARF v5 compliant. Type units and split DWARF are known + not to be compliant, and higher optimization levels will still emit some + information in v4 format. + * Note.. .. NOTE diff --git a/include/llvm/Support/DebugCounter.h b/include/llvm/Support/DebugCounter.h index 250fc6bb1f5c..83bd5a06c94a 100644 --- a/include/llvm/Support/DebugCounter.h +++ b/include/llvm/Support/DebugCounter.h @@ -70,10 +70,9 @@ class DebugCounter { return instance().addCounter(Name, Desc); } inline static bool shouldExecute(unsigned CounterName) { -// Compile to nothing when debugging is off -#ifdef NDEBUG - return true; -#else + if (!isCountingEnabled()) + return true; + auto &Us = instance(); auto Result = Us.Counters.find(CounterName); if (Result != Us.Counters.end()) { @@ -93,7 +92,6 @@ class DebugCounter { } // Didn't find the counter, should we warn? return true; -#endif // NDEBUG } // Return true if a given counter had values set (either programatically or on @@ -142,7 +140,23 @@ class DebugCounter { } CounterVector::const_iterator end() const { return RegisteredCounters.end(); } + // Force-enables counting all DebugCounters. + // + // Since DebugCounters are incompatible with threading (not only do they not + // make sense, but we'll also see data races), this should only be used in + // contexts where we're certain we won't spawn threads. + static void enableAllCounters() { instance().Enabled = true; } + private: + static bool isCountingEnabled() { +// Compile to nothing when debugging is off +#ifdef NDEBUG + return false; +#else + return instance().Enabled; +#endif + } + unsigned addCounter(const std::string &Name, const std::string &Desc) { unsigned Result = RegisteredCounters.insert(Name); Counters[Result] = {}; @@ -159,6 +173,10 @@ class DebugCounter { }; DenseMap Counters; CounterVector RegisteredCounters; + + // Whether we should do DebugCounting at all. DebugCounters aren't + // thread-safe, so this should always be false in multithreaded scenarios. + bool Enabled = false; }; #define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC) \ diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp index 5a9cecfc56d4..9c12de0776ad 100644 --- a/lib/Support/DebugCounter.cpp +++ b/lib/Support/DebugCounter.cpp @@ -82,6 +82,7 @@ void DebugCounter::push_back(const std::string &Val) { << " is not a registered counter\n"; return; } + enableAllCounters(); Counters[CounterID].Skip = CounterVal; Counters[CounterID].IsSet = true; } else if (CounterPair.first.endswith("-count")) { @@ -92,6 +93,7 @@ void DebugCounter::push_back(const std::string &Val) { << " is not a registered counter\n"; return; } + enableAllCounters(); Counters[CounterID].StopAfter = CounterVal; Counters[CounterID].IsSet = true; } else { diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 15d61cd1ad26..7caf32dbde2a 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -4639,7 +4639,9 @@ class BaseFPCondComparison { - def Hrr : BaseFPCondComparison { + def Hrr : BaseFPCondComparison { let Inst{23-22} = 0b11; let Predicates = [HasFullFP16]; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 51ff8a5cf77e..f6e13aee968a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11761,6 +11761,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, ShiftCst); } +// Is this an extending load from an f32 to an f64? +static bool isFPExtLoad(SDValue Op) { + if (LoadSDNode *LD = dyn_cast(Op.getNode())) + return LD->getExtensionType() == ISD::EXTLOAD && + Op.getValueType() == MVT::f64; + return false; +} + /// Reduces the number of fp-to-int conversion when building a vector. /// /// If this vector is built out of floating to integer conversions, @@ -11795,11 +11803,18 @@ combineElementTruncationToVectorTruncation(SDNode *N, SmallVector Ops; EVT TargetVT = N->getValueType(0); for (int i = 0, e = N->getNumOperands(); i < e; ++i) { - if (N->getOperand(i).getOpcode() != PPCISD::MFVSR) + SDValue NextOp = N->getOperand(i); + if (NextOp.getOpcode() != PPCISD::MFVSR) return SDValue(); - unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode(); + unsigned NextConversion = NextOp.getOperand(0).getOpcode(); if (NextConversion != FirstConversion) return SDValue(); + // If we are converting to 32-bit integers, we need to add an FP_ROUND. + // This is not valid if the input was originally double precision. It is + // also not profitable to do unless this is an extending load in which + // case doing this combine will allow us to combine consecutive loads. + if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) + return SDValue(); if (N->getOperand(i) != FirstInput) IsSplat = false; } @@ -11813,8 +11828,9 @@ combineElementTruncationToVectorTruncation(SDNode *N, // Now that we know we have the right type of node, get its operands for (int i = 0, e = N->getNumOperands(); i < e; ++i) { SDValue In = N->getOperand(i).getOperand(0); - // For 32-bit values, we need to add an FP_ROUND node. if (Is32Bit) { + // For 32-bit values, we need to add an FP_ROUND node (if we made it + // here, we know that all inputs are extending loads so this is safe). if (In.isUndef()) Ops.push_back(DAG.getUNDEF(SrcVT)); else { diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index ffba0e5aadb5..183512acaf9e 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -3494,6 +3494,17 @@ def DblToFlt { dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1)))); } +def ExtDbl { + dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0)))))); + dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1)))))); + dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0)))))); + dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1)))))); + dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0)))))); + dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1)))))); + dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0)))))); + dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1)))))); +} + def ByteToWord { dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); @@ -3571,9 +3582,15 @@ def FltToULong { } def DblToInt { dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A)))); + dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B)))); + dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C)))); + dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D)))); } def DblToUInt { dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A)))); + dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B)))); + dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C)))); + dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D)))); } def DblToLong { dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A)))); @@ -3612,6 +3629,47 @@ def MrgFP { dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3)); } +// Word-element merge dags - conversions from f64 to i32 merged into vectors. +def MrgWords { + // For big endian, we merge low and hi doublewords (A, B). + dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0)); + dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3)); + dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1)); + dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0)); + dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1)); + dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0)); + + // For little endian, we merge low and hi doublewords (B, A). + dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0)); + dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3)); + dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1)); + dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0)); + dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1)); + dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0)); + + // For big endian, we merge hi doublewords of (A, C) and (B, D), convert + // then merge. + dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC), + (COPY_TO_REGCLASS f64:$C, VSRC), 0)); + dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC), + (COPY_TO_REGCLASS f64:$D, VSRC), 0)); + dag CVACS = (v4i32 (XVCVDPSXWS AC)); + dag CVBDS = (v4i32 (XVCVDPSXWS BD)); + dag CVACU = (v4i32 (XVCVDPUXWS AC)); + dag CVBDU = (v4i32 (XVCVDPUXWS BD)); + + // For little endian, we merge hi doublewords of (D, B) and (C, A), convert + // then merge. + dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC), + (COPY_TO_REGCLASS f64:$B, VSRC), 0)); + dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC), + (COPY_TO_REGCLASS f64:$A, VSRC), 0)); + dag CVDBS = (v4i32 (XVCVDPSXWS DB)); + dag CVCAS = (v4i32 (XVCVDPSXWS CA)); + dag CVDBU = (v4i32 (XVCVDPUXWS DB)); + dag CVCAU = (v4i32 (XVCVDPUXWS CA)); +} + // Patterns for BUILD_VECTOR nodes. let AddedComplexity = 400 in { @@ -3679,6 +3737,20 @@ let AddedComplexity = 400 in { def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, DblToFlt.B0, DblToFlt.B1)), (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>; + + // Convert 4 doubles to a vector of ints. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>; } let Predicates = [IsLittleEndian, HasVSX] in { @@ -3693,6 +3765,20 @@ let AddedComplexity = 400 in { def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, DblToFlt.B0, DblToFlt.B1)), (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>; + + // Convert 4 doubles to a vector of ints. + def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B, + DblToInt.C, DblToInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>; + def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B, + DblToUInt.C, DblToUInt.D)), + (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S, + ExtDbl.B0S, ExtDbl.B1S)), + (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>; + def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U, + ExtDbl.B0U, ExtDbl.B1U)), + (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>; } let Predicates = [HasDirectMove] in { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 35a15577fe09..d082b42eefa9 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -738,6 +738,10 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { if (GV->isThreadLocal()) return false; + // Can't handle !absolute_symbol references yet. + if (GV->isAbsoluteSymbolRef()) + return false; + // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the // global value into its own register, which we can use as the basereg. diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll index c6c279d7d213..352a27539032 100644 --- a/test/CodeGen/AArch64/f16-instructions.ll +++ b/test/CodeGen/AArch64/f16-instructions.ll @@ -456,6 +456,36 @@ define i1 @test_fcmp_ord(half %a, half %b) #0 { ret i1 %r } +; CHECK-COMMON-LABEL: test_fccmp: +; CHECK-CVT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmov s1, #8.00000000 +; CHECK-CVT-NEXT: fmov s2, #5.00000000 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w8, gt +; CHECK-CVT-NEXT: fcmp s0, s2 +; CHECK-CVT-NEXT: cset w9, mi +; CHECK-CVT-NEXT: tst w8, w9 +; CHECK-CVT-NEXT: fcsel s0, s0, s2, ne +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: str h0, [x0] +; CHECK-CVT-NEXT: ret +; CHECK-FP16: fmov h1, #5.00000000 +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: fmov h2, #8.00000000 +; CHECK-FP16-NEXT: fccmp h0, h2, #4, mi +; CHECK-FP16-NEXT: fcsel h0, h0, h1, gt +; CHECK-FP16-NEXT: str h0, [x0] +; CHECK-FP16-NEXT: ret + +define void @test_fccmp(half %in, half* %out) { + %cmp1 = fcmp ogt half %in, 0xH4800 + %cmp2 = fcmp olt half %in, 0xH4500 + %cond = and i1 %cmp1, %cmp2 + %result = select i1 %cond, half %in, half 0xH4500 + store half %result, half* %out + ret void +} + ; CHECK-CVT-LABEL: test_br_cc: ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll index eecb2d4045e3..f074e2a0c0d0 100644 --- a/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/test/CodeGen/PowerPC/build-vector-tests.ll @@ -119,8 +119,8 @@ ;vector int spltCnstConvftoi() { // ; return (vector int) 4.74f; // ;} // -;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws // +;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromRegsConvftoi(float a, float b, float c, float d) { // ; return (vector int) { a, b, c, d }; // ;} // @@ -139,15 +139,15 @@ ;vector int fromDiffMemConsDConvftoi(float *ptr) { // ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // -;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, load, xvcvspuxws // ;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // ;} // -;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, 2 x load, vperm, xvcvspuxws // ;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { // @@ -168,8 +168,8 @@ ;vector int spltCnstConvdtoi() { // ; return (vector int) 4.74; // ;} // -;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromRegsConvdtoi(double a, double b, double c, double d) { // ; return (vector int) { a, b, c, d }; // ;} // @@ -178,25 +178,23 @@ ;vector int fromDiffConstsConvdtoi() { // ; return (vector int) { 24.46, 234., 988.19, 422.39 }; // ;} // -;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // -;// xvcvspsxws // -;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // -;// xvcvspsxws // +;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemConsAConvdtoi(double *ptr) { // ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // -;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemConsDConvdtoi(double *ptr) { // ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // -;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; // ;} // -;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // -;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew // ;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { // ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; // ;} // @@ -296,8 +294,8 @@ ;vector unsigned int spltCnstConvftoui() { // ; return (vector unsigned int) 4.74f; // ;} // -;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { // ; return (vector unsigned int) { a, b, c, d }; // ;} // @@ -316,16 +314,16 @@ ;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { // ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // -;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, load, xvcvspuxws // ;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem+1], // ; arr[elem+2], arr[elem+3] }; // ;} // -;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;// Note: if the consecutive loads learns to handle pre-inc, this can be: // ;// sldi 2, 2 x load, vperm, xvcvspuxws // ;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { // @@ -347,8 +345,8 @@ ;vector unsigned int spltCnstConvdtoui() { // ; return (vector unsigned int) 4.74; // ;} // -;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromRegsConvdtoui(double a, double b, // ; double c, double d) { // ; return (vector unsigned int) { a, b, c, d }; // @@ -358,25 +356,24 @@ ;vector unsigned int fromDiffConstsConvdtoui() { // ; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; // ;} // -;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, // -;// xvcvspuxws // -;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { // ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; // ;} // -;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { // ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; // ;} // -;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem+1], // ; arr[elem+2], arr[elem+3] }; // ;} // -;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // -;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws // +;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // +;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew // ;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { // ; return (vector unsigned int) { arr[elem], arr[elem-1], // ; arr[elem-2], arr[elem-3] }; // @@ -1253,28 +1250,24 @@ entry: ; P8LE-LABEL: fromRegsConvftoi ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9BE: xvcvspsxws v2, v2 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P9LE: xvcvspsxws v2, v2 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8BE: xvcvspsxws v2, v2 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] -; P8LE: xvcvspsxws v2, v2 } ; Function Attrs: norecurse nounwind readnone @@ -1529,28 +1522,24 @@ entry: ; P8LE-LABEL: fromRegsConvdtoi ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9BE: xvcvspsxws v2, v2 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P9LE: xvcvspsxws v2, v2 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8BE: xvcvspsxws v2, v2 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] -; P8LE: xvcvspsxws v2, v2 } ; Function Attrs: norecurse nounwind readnone @@ -1592,36 +1581,32 @@ entry: ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] +; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] -; P9BE: xvcvspsxws v2, v2 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] +; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P9LE: vmrgew v2, [[REG6]], [[REG5]] -; P9LE: xvcvspsxws v2, v2 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]] +; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]] ; P8BE: vmrgew v2, [[REG6]], [[REG5]] -; P8BE: xvcvspsxws v2, v2 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] -; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] +; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]] +; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]] ; P8LE: vmrgew v2, [[REG8]], [[REG7]] -; P8LE: xvcvspsxws v2, v2 } ; Function Attrs: norecurse nounwind readonly @@ -1653,40 +1638,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspsxws v2 +; P9BE: xvcvdpsxws +; P9BE: xvcvdpsxws +; P9BE: vmrgew v2 ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspsxws v2 +; P9LE: xvcvdpsxws +; P9LE: xvcvdpsxws +; P9LE: vmrgew v2 ; P8BE: lfdx ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspsxws v2 +; P8BE: xvcvdpsxws +; P8BE: xvcvdpsxws +; P8BE: vmrgew v2 ; P8LE: lfdx ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspsxws v2 +; P8LE: xvcvdpsxws +; P8LE: xvcvdpsxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly @@ -1726,40 +1707,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspsxws v2 +; P9BE: xvcvdpsxws +; P9BE: xvcvdpsxws +; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspsxws v2 +; P9LE: xvcvdpsxws +; P9LE: xvcvdpsxws +; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspsxws v2 +; P8BE: xvcvdpsxws +; P8BE: xvcvdpsxws +; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspsxws v2 +; P8LE: xvcvdpsxws +; P8LE: xvcvdpsxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly @@ -1799,40 +1776,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspsxws v2 +; P9BE: xvcvdpsxws +; P9BE: xvcvdpsxws +; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspsxws v2 +; P9LE: xvcvdpsxws +; P9LE: xvcvdpsxws +; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspsxws v2 +; P8BE: xvcvdpsxws +; P8BE: xvcvdpsxws +; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspsxws v2 +; P8LE: xvcvdpsxws +; P8LE: xvcvdpsxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone @@ -2413,28 +2386,24 @@ entry: ; P8LE-LABEL: fromRegsConvftoui ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9BE: xvcvspuxws v2, v2 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P9LE: xvcvspuxws v2, v2 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8BE: xvcvspuxws v2, v2 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] -; P8LE: xvcvspuxws v2, v2 } ; Function Attrs: norecurse nounwind readnone @@ -2689,28 +2658,24 @@ entry: ; P8LE-LABEL: fromRegsConvdtoui ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9BE: vmrgew v2, [[REG3]], [[REG4]] -; P9BE: xvcvspuxws v2, v2 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P9LE: vmrgew v2, [[REG4]], [[REG3]] -; P9LE: xvcvspuxws v2, v2 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4 -; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8BE: vmrgew v2, [[REG3]], [[REG4]] -; P8BE: xvcvspuxws v2, v2 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2 -; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] -; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] +; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]] +; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]] ; P8LE: vmrgew v2, [[REG4]], [[REG3]] -; P8LE: xvcvspuxws v2, v2 } ; Function Attrs: norecurse nounwind readnone @@ -2752,36 +2717,32 @@ entry: ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] +; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P9BE: vmrgew v2, [[REG6]], [[REG5]] -; P9BE: xvcvspuxws v2, v2 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3) ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3) -; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]] -; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]] +; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] +; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P9LE: vmrgew v2, [[REG6]], [[REG5]] -; P9LE: xvcvspuxws v2, v2 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]] ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]] -; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]] -; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]] +; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]] +; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]] ; P8BE: vmrgew v2, [[REG6]], [[REG5]] -; P8BE: xvcvspuxws v2, v2 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]] ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]] ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]] ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]] -; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]] -; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]] +; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]] +; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]] ; P8LE: vmrgew v2, [[REG8]], [[REG7]] -; P8LE: xvcvspuxws v2, v2 } ; Function Attrs: norecurse nounwind readonly @@ -2813,40 +2774,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspuxws v2 +; P9BE: xvcvdpuxws +; P9BE: xvcvdpuxws +; P9BE: vmrgew v2 ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspuxws v2 +; P9LE: xvcvdpuxws +; P9LE: xvcvdpuxws +; P9LE: vmrgew v2 ; P8BE: lfdx ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspuxws v2 +; P8BE: xvcvdpuxws +; P8BE: xvcvdpuxws +; P8BE: vmrgew v2 ; P8LE: lfdx ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspuxws v2 +; P8LE: xvcvdpuxws +; P8LE: xvcvdpuxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly @@ -2886,40 +2843,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspuxws v2 +; P9BE: xvcvdpuxws +; P9BE: xvcvdpuxws +; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspuxws v2 +; P9LE: xvcvdpuxws +; P9LE: xvcvdpuxws +; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspuxws v2 +; P8BE: xvcvdpuxws +; P8BE: xvcvdpuxws +; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspuxws v2 +; P8LE: xvcvdpuxws +; P8LE: xvcvdpuxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readonly @@ -2959,40 +2912,36 @@ entry: ; P9BE: lfd ; P9BE: xxmrghd ; P9BE: xxmrghd -; P9BE: xvcvdpsp -; P9BE: xvcvdpsp -; P9BE: vmrgew -; P9BE: xvcvspuxws v2 +; P9BE: xvcvdpuxws +; P9BE: xvcvdpuxws +; P9BE: vmrgew v2 ; P9LE: lfdux ; P9LE: lfd ; P9LE: lfd ; P9LE: lfd ; P9LE: xxmrghd ; P9LE: xxmrghd -; P9LE: xvcvdpsp -; P9LE: xvcvdpsp -; P9LE: vmrgew -; P9LE: xvcvspuxws v2 +; P9LE: xvcvdpuxws +; P9LE: xvcvdpuxws +; P9LE: vmrgew v2 ; P8BE: lfdux ; P8BE: lfd ; P8BE: lfd ; P8BE: lfd ; P8BE: xxmrghd ; P8BE: xxmrghd -; P8BE: xvcvdpsp -; P8BE: xvcvdpsp -; P8BE: vmrgew -; P8BE: xvcvspuxws v2 +; P8BE: xvcvdpuxws +; P8BE: xvcvdpuxws +; P8BE: vmrgew v2 ; P8LE: lfdux ; P8LE: lfd ; P8LE: lfd ; P8LE: lfd ; P8LE: xxmrghd ; P8LE: xxmrghd -; P8LE: xvcvdpsp -; P8LE: xvcvdpsp -; P8LE: vmrgew -; P8LE: xvcvspuxws v2 +; P8LE: xvcvdpuxws +; P8LE: xvcvdpuxws +; P8LE: vmrgew v2 } ; Function Attrs: norecurse nounwind readnone diff --git a/test/CodeGen/X86/absolute-bit-mask-fastisel.ll b/test/CodeGen/X86/absolute-bit-mask-fastisel.ll new file mode 100644 index 000000000000..34e92e804572 --- /dev/null +++ b/test/CodeGen/X86/absolute-bit-mask-fastisel.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s | FileCheck %s +; RUN: llc -relocation-model=pic < %s | FileCheck %s + +; Regression test for PR38200 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@bit_mask8 = external hidden global i8, !absolute_symbol !0 + +declare void @f() + +define void @foo8(i8* %ptr) noinline optnone { + %load = load i8, i8* %ptr + ; CHECK: movl $bit_mask8, %ecx + %and = and i8 %load, ptrtoint (i8* @bit_mask8 to i8) + %icmp = icmp eq i8 %and, 0 + br i1 %icmp, label %t, label %f + +t: + call void @f() + ret void + +f: + ret void +} + +!0 = !{i64 0, i64 256} diff --git a/test/tools/llvm-ar/invalid-command-line.test b/test/tools/llvm-ar/invalid-command-line.test index e13f54c07bd0..2bfcc1a457db 100644 --- a/test/tools/llvm-ar/invalid-command-line.test +++ b/test/tools/llvm-ar/invalid-command-line.test @@ -2,4 +2,4 @@ Test that llvm-ar exits with 1 when there is an error. RUN: not llvm-ar e 2>&1 | FileCheck %s CHECK: unknown option e. -CHECK: OVERVIEW: LLVM Archiver (llvm-ar) +CHECK: OVERVIEW: LLVM Archiver diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp index 9023bdd1a0d6..64be08ff946a 100644 --- a/tools/llvm-ar/llvm-ar.cpp +++ b/tools/llvm-ar/llvm-ar.cpp @@ -63,46 +63,44 @@ USAGE: llvm-ranlib )"; const char ArHelp[] = R"( -OVERVIEW: LLVM Archiver (llvm-ar) +OVERVIEW: LLVM Archiver - This program archives bitcode files into single libraries - -USAGE: llvm-ar [options] [relpos] [count] [members]... +USAGE: llvm-ar [options] [-][modifiers] [relpos] [files] + llvm-ar -M [ - plugin (ignored for compatibility - -help - Display available options - -version - Display the version of this program + --format - Archive format to create + =default - default + =gnu - gnu + =darwin - darwin + =bsd - bsd + --plugin= - Ignored for compatibility + --help - Display available options + --version - Display the version of this program OPERATIONS: - d[NsS] - delete file(s) from the archive - m[abiSs] - move file(s) in the archive - p[kN] - print file(s) found in the archive - q[ufsS] - quick append file(s) to the archive - r[abfiuRsS] - replace or insert file(s) into the archive - t - display contents of archive - x[No] - extract file(s) from the archive + d - delete [files] from the archive + m - move [files] in the archive + p - print [files] found in the archive + q - quick append [files] to the archive + r - replace or insert [files] into the archive + s - act as ranlib + t - display contents of archive + x - extract [files] from the archive -MODIFIERS (operation specific): - [a] - put file(s) after [relpos] - [b] - put file(s) before [relpos] (same as [i]) +MODIFIERS: + [a] - put [files] after [relpos] + [b] - put [files] before [relpos] (same as [i]) + [c] - do not warn if archive had to be created [D] - use zero for timestamps and uids/gids (default) - [i] - put file(s) before [relpos] (same as [b]) + [i] - put [files] before [relpos] (same as [b]) + [l] - ignored for compatibility [o] - preserve original dates [s] - create an archive index (cf. ranlib) [S] - do not build a symbol table [T] - create a thin archive - [u] - update only files newer than archive contents + [u] - update only [files] newer than archive contents [U] - use actual timestamps and uids/gids - -MODIFIERS (generic): - [c] - do not warn if the library had to be created [v] - be verbose about actions taken )"; diff --git a/tools/llvm-shlib/CMakeLists.txt b/tools/llvm-shlib/CMakeLists.txt index 836024eb8101..13d2c3d2c51f 100644 --- a/tools/llvm-shlib/CMakeLists.txt +++ b/tools/llvm-shlib/CMakeLists.txt @@ -59,10 +59,6 @@ endif() target_link_libraries(LLVM PRIVATE ${LIB_NAMES}) -if (LLVM_DYLIB_SYMBOL_VERSIONING) - set_property(TARGET LLVM APPEND_STRING PROPERTY LINK_FLAGS " -Wl,--default-symver") -endif() - if (APPLE) set_property(TARGET LLVM APPEND_STRING PROPERTY LINK_FLAGS diff --git a/utils/release/tag.sh b/utils/release/tag.sh index c3e839d93238..89aab6b1e7b4 100755 --- a/utils/release/tag.sh +++ b/utils/release/tag.sh @@ -17,7 +17,7 @@ set -e release="" rc="" rebranch="no" -projects="llvm cfe test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind" +projects="llvm cfe test-suite compiler-rt libcxx libcxxabi clang-tools-extra polly lldb lld openmp libunwind debuginfo-tests" dryrun="" revision="HEAD"