From 5e529592b17880abebd71c233b1cb848c32abeb6 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 24 Aug 2017 16:35:02 +0000 Subject: [PATCH 1/4] Vendor import of llvm release_50 branch r311606: https://llvm.org/svn/llvm-project/llvm/branches/release_50@311606 --- docs/ReleaseNotes.rst | 50 +- include/llvm/CodeGen/SelectionDAGNodes.h | 5 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 33 + lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +- lib/ExecutionEngine/CMakeLists.txt | 4 + lib/IR/AutoUpgrade.cpp | 28 +- lib/Object/COFFModuleDefinition.cpp | 8 +- lib/Target/ARM/ARMISelLowering.cpp | 24 +- lib/Target/X86/X86ISelLowering.cpp | 17 +- lib/Target/X86/X86InstrAVX512.td | 4 +- lib/Target/X86/X86SchedSandyBridge.td | 2478 +---------------- .../llvm-dlltool/DlltoolDriver.cpp | 16 + lib/ToolDrivers/llvm-dlltool/Options.td | 6 +- lib/Transforms/Scalar/LowerAtomic.cpp | 3 +- lib/Transforms/Scalar/Reassociate.cpp | 6 + lib/Transforms/Utils/CloneFunction.cpp | 5 +- test/Bitcode/upgrade-module-flag.ll | 12 +- test/CodeGen/ARM/Windows/vla-cpsr.ll | 13 + test/CodeGen/ARM/vzip.ll | 19 + test/CodeGen/X86/avx-schedule.ll | 408 +-- test/CodeGen/X86/avx512-extract-subvector.ll | 2 +- ...ractelement-legalization-store-ordering.ll | 22 +- test/CodeGen/X86/f16c-schedule.ll | 144 - test/CodeGen/X86/fp128-i128.ll | 2 +- test/CodeGen/X86/gather-addresses.ll | 16 +- test/CodeGen/X86/lea32-schedule.ll | 653 ----- test/CodeGen/X86/lea64-schedule.ll | 534 ---- test/CodeGen/X86/popcnt-schedule.ll | 167 -- test/CodeGen/X86/pr34139.ll | 24 + test/CodeGen/X86/pr34177.ll | 52 + test/CodeGen/X86/pr34271-1.ll | 14 + test/CodeGen/X86/pr34271.ll | 14 + test/CodeGen/X86/recip-fastmath.ll | 46 +- test/CodeGen/X86/recip-fastmath2.ll | 70 +- test/CodeGen/X86/sse-schedule.ll | 248 +- test/CodeGen/X86/sse2-schedule.ll | 598 ++-- test/CodeGen/X86/sse3-schedule.ll | 48 +- test/CodeGen/X86/sse41-schedule.ll | 222 +- test/CodeGen/X86/sse42-schedule.ll | 38 +- test/CodeGen/X86/ssse3-schedule.ll | 74 +- test/DllTool/coff-decorated.def | 26 + test/Feature/optnone-opt.ll | 1 - test/Linker/module-flags-pic-1-a.ll | 4 +- test/Transforms/Inline/recursive.ll | 31 + test/Transforms/LowerAtomic/atomic-swap.ll | 11 + .../Reassociate/canonicalize-neg-const.ll | 22 + 47 files changed, 1355 insertions(+), 4879 deletions(-) create mode 100644 test/CodeGen/ARM/Windows/vla-cpsr.ll delete mode 100644 test/CodeGen/X86/f16c-schedule.ll delete mode 100644 test/CodeGen/X86/lea32-schedule.ll delete mode 100644 test/CodeGen/X86/lea64-schedule.ll delete mode 100644 test/CodeGen/X86/popcnt-schedule.ll create mode 100644 test/CodeGen/X86/pr34139.ll create mode 100644 test/CodeGen/X86/pr34177.ll create mode 100644 test/CodeGen/X86/pr34271-1.ll create mode 100644 test/CodeGen/X86/pr34271.ll create mode 100644 test/DllTool/coff-decorated.def diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 48af491f1214..f6ef4e0a3fa2 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -77,11 +77,33 @@ Changes to the LLVM IR * Added speculatable attribute indicating a function which does has no side-effects which could inhibit hoisting of calls. -Changes to the ARM Backend +Changes to the Arm Targets -------------------------- - During this release ... +During this release the AArch64 target has: +* A much improved Global ISel at O0. +* Support for ARMv8.1 8.2 and 8.3 instructions. +* New scheduler information for ThunderX2. +* Some SVE type changes but not much more than that. +* Made instruction fusion more aggressive, resulting in speedups + for code making use of AArch64 AES instructions. AES fusion has been + enabled for most Cortex-A cores and the AArch64MacroFusion pass was moved + to the generic MacroFusion pass. +* Added preferred function alignments for most Cortex-A cores. +* OpenMP "offload-to-self" base support. + +During this release the ARM target has: + +* Improved, but still mostly broken, Global ISel. +* Scheduling models update, new schedule for Cortex-A57. +* Hardware breakpoint support in LLDB. +* New assembler error handling, with spelling corrections and multiple + suggestions on how to fix problems. +* Improved mixed ARM/Thumb code generation. Some cases in which wrong + relocations were emitted have been fixed. +* Added initial support for mixed ARM/Thumb link-time optimization, using the + thumb-mode target feature. Changes to the MIPS Target -------------------------- @@ -92,7 +114,29 @@ Changes to the MIPS Target Changes to the PowerPC Target ----------------------------- - During this release ... +* Additional support and exploitation of POWER ISA 3.0: vabsdub, vabsduh, + vabsduw, modsw, moduw, modsd, modud, lxv, stxv, vextublx, vextubrx, vextuhlx, + vextuhrx, vextuwlx, vextuwrx, vextsb2w, vextsb2d, vextsh2w, vextsh2d, and + vextsw2d + +* Implemented Optimal Code Sequences from The PowerPC Compiler Writer's Guide. + +* Enable -fomit-frame-pointer by default. + +* Improved handling of bit reverse intrinsic. + +* Improved handling of memcpy and memcmp functions. + +* Improved handling of branches with static branch hints. + +* Improved codegen for atomic load_acquire. + +* Improved block placement during code layout + +* Many improvements to instruction selection and code generation + + + Changes to the X86 Target ------------------------- diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index db42fb6c170c..051c93601d3f 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -85,7 +85,10 @@ namespace ISD { /// If N is a BUILD_VECTOR node whose elements are all the same constant or /// undefined, return true and return the constant value in \p SplatValue. - bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); + /// This sets \p SplatValue to the smallest possible splat unless AllowShrink + /// is set to false. + bool isConstantSplatVector(const SDNode *N, APInt &SplatValue, + bool AllowShrink = true); /// Return true if the specified node is a BUILD_VECTOR where all of the /// elements are ~0 or undef. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e102df5e913d..c46d1b04804c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -627,6 +627,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); + SDValue ScalarizeVecOp_VSETCC(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ecb54e1e4b41..6aa3270883f0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -484,6 +484,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VSELECT: Res = ScalarizeVecOp_VSELECT(N); break; + case ISD::SETCC: + Res = ScalarizeVecOp_VSETCC(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; @@ -560,6 +563,36 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { N->getOperand(2)); } +/// If the operand is a vector that needs to be scalarized then the +/// result must be v1i1, so just convert to a scalar SETCC and wrap +/// with a scalar_to_vector since the res type is legal if we got here +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); + + EVT VT = N->getValueType(0); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + + EVT OpVT = N->getOperand(0).getValueType(); + EVT NVT = VT.getVectorElementType(); + SDLoc DL(N); + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + + Res = DAG.getNode(ExtendCode, DL, NVT, Res); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); +} + /// If the value to store is a vector that needs to be scalarized, it must be /// <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0ff154784f68..16f425dc7969 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -116,7 +116,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, + bool AllowShrink) { auto *BV = dyn_cast(N); if (!BV) return false; @@ -124,9 +125,11 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { APInt SplatUndef; unsigned SplatBitSize; bool HasUndefs; - EVT EltVT = N->getValueType(0).getVectorElementType(); - return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs) && - EltVT.getSizeInBits() >= SplatBitSize; + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + unsigned MinSplatBits = AllowShrink ? 0 : EltSize; + return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, + MinSplatBits) && + EltSize >= SplatBitSize; } // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index 2d9337bbefd2..84b34919e442 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -14,6 +14,10 @@ add_llvm_library(LLVMExecutionEngine intrinsics_gen ) +if(BUILD_SHARED_LIBS) + target_link_libraries(LLVMExecutionEngine PUBLIC LLVMRuntimeDyld) +endif() + add_subdirectory(Interpreter) add_subdirectory(MCJIT) add_subdirectory(Orc) diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 6a4b8032ffd5..a501799b4799 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -2239,14 +2239,14 @@ bool llvm::UpgradeDebugInfo(Module &M) { } bool llvm::UpgradeModuleFlags(Module &M) { - const NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); + NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); if (!ModFlags) return false; - bool HasObjCFlag = false, HasClassProperties = false; + bool HasObjCFlag = false, HasClassProperties = false, Changed = false; for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { MDNode *Op = ModFlags->getOperand(I); - if (Op->getNumOperands() < 2) + if (Op->getNumOperands() != 3) continue; MDString *ID = dyn_cast_or_null(Op->getOperand(1)); if (!ID) @@ -2255,7 +2255,24 @@ bool llvm::UpgradeModuleFlags(Module &M) { HasObjCFlag = true; if (ID->getString() == "Objective-C Class Properties") HasClassProperties = true; + // Upgrade PIC/PIE Module Flags. The module flag behavior for these two + // field was Error and now they are Max. + if (ID->getString() == "PIC Level" || ID->getString() == "PIE Level") { + if (auto *Behavior = + mdconst::dyn_extract_or_null(Op->getOperand(0))) { + if (Behavior->getLimitedValue() == Module::Error) { + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + Metadata *Ops[3] = { + ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)), + MDString::get(M.getContext(), ID->getString()), + Op->getOperand(2)}; + ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); + Changed = true; + } + } + } } + // "Objective-C Class Properties" is recently added for Objective-C. We // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module // flag of value 0, so we can correclty downgrade this flag when trying to @@ -2264,9 +2281,10 @@ bool llvm::UpgradeModuleFlags(Module &M) { if (HasObjCFlag && !HasClassProperties) { M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", (uint32_t)0); - return true; + Changed = true; } - return false; + + return Changed; } static bool isOldLoopArgument(Metadata *MD) { diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp index ed9140d1fe08..510eac8b239b 100644 --- a/lib/Object/COFFModuleDefinition.cpp +++ b/lib/Object/COFFModuleDefinition.cpp @@ -232,7 +232,13 @@ class Parser { for (;;) { read(); if (Tok.K == Identifier && Tok.Value[0] == '@') { - Tok.Value.drop_front().getAsInteger(10, E.Ordinal); + if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { + // Not an ordinal modifier at all, but the next export (fastcall + // decorated) - complete the current one. + unget(); + Info.Exports.push_back(E); + return Error::success(); + } read(); if (Tok.K == KwNoname) { E.Noname = true; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6ba7593543a9..27dda93387b6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5901,7 +5901,10 @@ static bool isVUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5932,7 +5935,10 @@ static bool isVUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5972,7 +5978,10 @@ static bool isVZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6005,7 +6014,10 @@ static bool isVZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - WhichResult = M[i] == 0 ? 0 : 1; + if (M.size() == NumElts * 2) + WhichResult = i / NumElts; + else + WhichResult = M[i] == 0 ? 0 : 1; unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -8793,6 +8805,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; case CodeModel::Large: @@ -8808,6 +8822,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::R4, RegState::Implicit | RegState::Kill) .addReg(ARM::R4, RegState::Implicit | RegState::Define) .addReg(ARM::R12, + RegState::Implicit | RegState::Define | RegState::Dead) + .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1e73122cdc38..193ee8de6192 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -29540,8 +29540,9 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, // In SetLT case, The second operand of the comparison can be either 1 or 0. APInt SplatVal; if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal == 1) || + !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal, + /*AllowShrink*/false) && + SplatVal.isOneValue()) || (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) return false; @@ -30628,6 +30629,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Byte blends are only available in AVX2 if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) return SDValue(); + // There are no 512-bit blend instructions that use sign bits. + if (VT.is512BitVector()) + return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); APInt DemandedMask(APInt::getSignMask(BitWidth)); @@ -32058,7 +32062,8 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, return SDValue(); APInt SplatVal; - if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || + if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal, + /*AllowShrink*/false) || !SplatVal.isMask()) return SDValue(); @@ -32642,7 +32647,8 @@ static SDValue detectUSatPattern(SDValue In, EVT VT) { "Unexpected types for truncate operation"); APInt C; - if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { + if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C, + /*AllowShrink*/false)) { // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according // the element size of the destination type. return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) : @@ -35346,7 +35352,8 @@ static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { SDNode *N1 = N->getOperand(1).getNode(); APInt SplatVal; - if (!ISD::isConstantSplatVector(N1, SplatVal) || !SplatVal.isOneValue()) + if (!ISD::isConstantSplatVector(N1, SplatVal, /*AllowShrink*/false) || + !SplatVal.isOneValue()) return SDValue(); SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N)); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 0e654a380e7c..0ae960e7d566 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -3619,8 +3619,8 @@ let Predicates = [HasVLX] in { def : Pat<(alignedstore256 (v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; - def : Pat<(alignedstore (v8f32 (extract_subvector - (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + def : Pat<(alignedstore256 (v8f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; def : Pat<(alignedstore256 (v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))), addr:$dst), diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 6d85ca6cad64..b8ec5883152c 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size. let LoopMicroOpBufferSize = 28; - // This flag is set to allow the scheduler to assign - // a default model to unrecognized opcodes. + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -48,7 +48,6 @@ def SBPort23 : ProcResource<2>; def SBPort4 : ProcResource<1>; // Many micro-ops are capable of issuing on multiple ports. -def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; @@ -116,10 +115,10 @@ def : WriteRes { // Scalar and vector floating point. defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; // 10-14 cycles. defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -135,11 +134,11 @@ def : WriteRes { } // Vector integer operations. -defm : SBWriteResPair; -defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; -defm : SBWriteResPair; +defm : SBWriteResPair; defm : SBWriteResPair; def : WriteRes { let Latency = 2; @@ -149,15 +148,13 @@ def : WriteRes { let Latency = 6; let ResourceCycles = [1, 1, 1]; } -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 1]; } -def : WriteRes { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 1, 1]; } //////////////////////////////////////////////////////////////////////////////// @@ -207,15 +204,13 @@ def : WriteRes { } // Packed Compare Implicit Length Strings, Return Index -def : WriteRes { - let Latency = 11; - let NumMicroOps = 3; +def : WriteRes { + let Latency = 3; let ResourceCycles = [3]; } -def : WriteRes { - let Latency = 17; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; +def : WriteRes { + let Latency = 3; + let ResourceCycles = [3, 1]; } // Packed Compare Explicit Length Strings, Return Index @@ -229,26 +224,22 @@ def : WriteRes { } // AES Instructions. -def : WriteRes { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def : WriteRes { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} - -def : WriteRes { - let Latency = 12; - let NumMicroOps = 2; +def : WriteRes { + let Latency = 8; let ResourceCycles = [2]; } -def : WriteRes { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; +def : WriteRes { + let Latency = 8; + let ResourceCycles = [2, 1]; +} + +def : WriteRes { + let Latency = 8; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [2, 1]; } def : WriteRes { @@ -281,2407 +272,4 @@ def : WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; - -// Remaining SNB instrs. - -def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>; - -def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>; -def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>; -def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>; -def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>; - -def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>; -def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>; -def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>; -def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>; -def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>; -def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>; -def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>; -def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>; -def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>; - -def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>; - -def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>; -def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>; -def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>; -def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>; -def: InstRW<[SBWriteResGroup4], (instregex "CQO")>; -def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>; -def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>; -def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>; - -def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>; -def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>; - -def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "CBW")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMC")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>; -def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>; -def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>; -def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>; -def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>; -def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>; -def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>; -def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>; -def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>; -def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>; -def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "STC")>; -def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>; -def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>; -def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>; - -def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>; -def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>; -def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>; - -def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>; -def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>; -def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>; -def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>; -def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>; -def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>; -def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>; -def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>; -def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>; -def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>; -def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>; -def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>; - -def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>; - -def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>; -def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>; -def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>; -def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>; - -def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>; - -def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>; - -def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>; -def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>; - -def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>; - -def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>; - -def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>; -def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>; -def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>; - -def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>; - -def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>; -def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>; -def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>; -def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>; - -def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>; -def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>; -def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>; - -def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>; -def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>; -def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>; -def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>; -def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>; -def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>; -def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>; -def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>; -def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>; - -def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>; - -def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>; -def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>; -def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>; -def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>; -def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>; - -def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>; -def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>; -def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>; - -def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>; -def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>; -def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>; - -def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>; - -def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>; - -def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>; - -def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>; -def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>; - -def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>; -def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>; -def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>; -def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>; -def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>; -def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>; -def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>; -def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>; -def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>; - -def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>; -def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>; - -def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>; - -def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>; -def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>; -def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>; - -def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>; -def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>; - -def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup35], (instregex "CLI")>; -def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>; -def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>; - -def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>; -def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>; -def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>; - -def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>; -def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>; -def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>; - -def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>; -def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>; - -def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>; -def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>; -def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>; -def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>; - -def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>; -def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>; -def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>; -def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>; -def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>; - -def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>; - -def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>; -def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>; - -def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>; -def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>; - -def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>; -def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>; -def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>; -def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>; - -def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>; -def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>; -def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>; -def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>; -def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>; - -def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>; - -def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>; - -def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> { - let Latency = 6; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>; -def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>; -def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>; - -def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>; -def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>; - -def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>; - -def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>; -def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>; - -def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>; -def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>; -def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>; -def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>; -def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>; - -def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>; -def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>; -def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>; -def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>; -def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>; -def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>; - -def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> { - let Latency = 7; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>; -def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>; - -def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>; -def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>; - -def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>; -def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>; -def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>; - -def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>; -def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>; - -def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>; -def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>; -def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>; -def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>; -def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>; - -def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>; - -def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>; -def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>; - -def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>; -def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>; - -def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>; -def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>; - -def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>; -def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>; - -def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>; - -def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>; -def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>; - -def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>; - -def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>; -def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>; - -def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>; -def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>; - -def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>; -def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>; -def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>; -def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>; -def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>; -def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>; - -def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>; -def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>; -def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>; -def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>; -def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>; -def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>; -def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>; -def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>; -def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>; -def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>; -def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>; -def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>; -def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>; -def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>; -def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>; -def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>; - -def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>; -def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>; -def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>; -def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>; - -def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>; -def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>; -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>; -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>; -def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>; -def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>; -def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>; -def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>; -def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>; - -def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>; -def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>; -def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>; -def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>; -def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>; - -def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>; -def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>; - -def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>; -def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>; -def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>; -def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>; -def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>; - -def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>; - -def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>; -def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>; - -def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>; -def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>; - -def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>; -def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>; - -def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>; -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>; -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>; -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>; -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>; -def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>; - -def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>; -def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>; - -def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>; -def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>; - -def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>; -def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>; -def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>; -def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>; - -def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>; - -def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>; -def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>; -def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>; -def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>; - -def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>; -def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>; -def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>; -def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>; -def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>; -def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>; - -def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>; - -def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>; -def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>; - -def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>; -def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>; - -def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>; -def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>; -def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>; -def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>; -def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>; - -def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>; -def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>; -def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>; - -def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>; -def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>; - -def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>; -def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>; - -def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>; - -def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>; -def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>; -def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>; - -def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>; -def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>; -def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>; -def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>; - -def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>; -def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>; -def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>; -def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>; -def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>; -def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>; -def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>; - -def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,2,3]; -} -def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>; -def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>; -def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>; -def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>; - -def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,2,2,1]; -} -def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>; -def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>; -def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>; -def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>; - -def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,1,2,1,1]; -} -def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>; -def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>; -def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>; -def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>; - -def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>; -def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>; -def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>; -def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>; -def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>; -def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>; -def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>; -def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>; -def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>; -def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>; -def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>; -def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>; -def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>; - -def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>; - -def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>; -def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>; - -def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>; -def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>; - -def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>; -def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>; -def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>; -def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>; - -def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>; -def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>; -def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>; -def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>; - -def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>; -def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>; -def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>; - -def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>; -def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>; - -def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>; -def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>; - -def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> { - let Latency = 12; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>; -def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>; - -def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 12; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>; -def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>; -def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>; -def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>; - -def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>; -def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>; -def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>; - -def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>; - -def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>; -def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>; -def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>; -def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>; -def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>; -def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>; - -def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>; -def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>; - -def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>; -def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>; - -def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>; - -def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>; -def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>; - -def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 15; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>; -def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>; - -def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>; -def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>; - -def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 17; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; -} -def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>; -def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>; -def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>; -def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>; - -def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>; -def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>; - -def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 20; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>; -def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>; -def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>; -def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>; -def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>; -def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>; -def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>; - -def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> { - let Latency = 21; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>; - -def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 21; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>; - -def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> { - let Latency = 22; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>; -def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>; -def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>; -def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>; -def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>; -def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>; -def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>; - -def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> { - let Latency = 24; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>; -def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>; -def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>; -def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>; -def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>; -def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>; - -def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 28; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>; -def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>; -def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>; -def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>; -def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>; -def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>; -def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>; - -def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 29; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>; -def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>; - -def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 31; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>; -def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>; -def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>; -def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>; - -def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 34; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>; -def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>; -def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>; -def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>; - -def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 36; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>; -def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>; - -def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 45; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>; -def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>; - -def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 52; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>; -def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>; - -def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> { - let Latency = 114; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>; - } // SchedModel diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index fc15dc1e6032..4820b9f7de58 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -161,6 +161,22 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { if (Path.empty()) Path = getImplibPath(Def->OutputFile); + if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) { + for (COFFShortExport& E : Def->Exports) { + if (E.isWeak() || (!E.Name.empty() && E.Name[0] == '?')) + continue; + E.SymbolName = E.Name; + // Trim off the trailing decoration. Symbols will always have a + // starting prefix here (either _ for cdecl/stdcall, @ for fastcall + // or ? for C++ functions). (Vectorcall functions also will end up having + // a prefix here, even if they shouldn't.) + E.Name = E.Name.substr(0, E.Name.find('@', 1)); + // By making sure E.SymbolName != E.Name for decorated symbols, + // writeImportLibrary writes these symbols with the type + // IMPORT_NAME_UNDECORATE. + } + } + if (writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true)) return 1; return 0; diff --git a/lib/ToolDrivers/llvm-dlltool/Options.td b/lib/ToolDrivers/llvm-dlltool/Options.td index 213c6a4d7674..e78182ab8130 100644 --- a/lib/ToolDrivers/llvm-dlltool/Options.td +++ b/lib/ToolDrivers/llvm-dlltool/Options.td @@ -12,13 +12,13 @@ def D_long : JoinedOrSeparate<["--"], "dllname">, Alias; def d: JoinedOrSeparate<["-"], "d">, HelpText<"Input .def File">; def d_long : JoinedOrSeparate<["--"], "input-def">, Alias; +def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">; +def k_alias: Flag<["--"], "kill-at">, Alias; + //============================================================================== // The flags below do nothing. They are defined only for dlltool compatibility. //============================================================================== -def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">; -def k_alias: Flag<["--"], "kill-at">, Alias; - def S: JoinedOrSeparate<["-"], "S">, HelpText<"Assembler">; def S_alias: JoinedOrSeparate<["--"], "as">, Alias; diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 08e60b16bedf..6f77c5bd0d07 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -155,8 +155,7 @@ class LowerAtomicLegacyPass : public FunctionPass { } bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; + // Don't skip optnone functions; atomics still need to be lowered. FunctionAnalysisManager DummyFAM; auto PA = Impl.run(F, DummyFAM); return !PA.areAllPreserved(); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 29d1ba406ae4..e235e5eb1a06 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -1941,6 +1941,12 @@ Instruction *ReassociatePass::canonicalizeNegConstExpr(Instruction *I) { if (!User->isCommutative() && User->getOperand(1) != I) return nullptr; + // Don't canonicalize x + (-Constant * y) -> x - (Constant * y), if the + // resulting subtract will be broken up later. This can get us into an + // infinite loop during reassociation. + if (UserOpcode == Instruction::FAdd && ShouldBreakUpSubtract(User)) + return nullptr; + // Change the sign of the constant. APFloat Val = CF->getValueAPF(); Val.changeSign(); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 7e75e8847785..9c4e13903ed7 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -341,8 +341,9 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) { // On the off-chance that this simplifies to an instruction in the old // function, map it back into the new function. - if (Value *MappedV = VMap.lookup(V)) - V = MappedV; + if (NewFunc != OldFunc) + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; if (!NewInst->mayHaveSideEffects()) { VMap[&*II] = V; diff --git a/test/Bitcode/upgrade-module-flag.ll b/test/Bitcode/upgrade-module-flag.ll index d6741faa837f..de6c9b2cf1bb 100644 --- a/test/Bitcode/upgrade-module-flag.ll +++ b/test/Bitcode/upgrade-module-flag.ll @@ -1,9 +1,13 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s ; RUN: verify-uselistorder < %s -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1, !2} -!0 = !{i32 1, !"Objective-C Image Info Version", i32 0} +!0 = !{i32 1, !"PIC Level", i32 1} +!1 = !{i32 1, !"PIE Level", i32 1} +!2 = !{i32 1, !"Objective-C Image Info Version", i32 0} -; CHECK: !0 = !{i32 1, !"Objective-C Image Info Version", i32 0} -; CHECK: !1 = !{i32 4, !"Objective-C Class Properties", i32 0} +; CHECK: !0 = !{i32 7, !"PIC Level", i32 1} +; CHECK: !1 = !{i32 7, !"PIE Level", i32 1} +; CHECK: !2 = !{i32 1, !"Objective-C Image Info Version", i32 0} +; CHECK: !3 = !{i32 4, !"Objective-C Class Properties", i32 0} diff --git a/test/CodeGen/ARM/Windows/vla-cpsr.ll b/test/CodeGen/ARM/Windows/vla-cpsr.ll new file mode 100644 index 000000000000..de0f0b68a4d2 --- /dev/null +++ b/test/CodeGen/ARM/Windows/vla-cpsr.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o /dev/null %s -print-machineinstrs=expand-isel-pseudos 2>&1 | FileCheck %s + +declare arm_aapcs_vfpcc void @g(i8*) local_unnamed_addr + +define arm_aapcs_vfpcc void @f(i32 %i) local_unnamed_addr { +entry: + %vla = alloca i8, i32 %i, align 1 + call arm_aapcs_vfpcc void @g(i8* nonnull %vla) + ret void +} + +; CHECK: tBL pred:14, pred:%noreg, , %LR, %SP, %R4, %R4, %R12, %CPSR + diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll index 771bf5f05215..06b49ab94053 100644 --- a/test/CodeGen/ARM/vzip.ll +++ b/test/CodeGen/ARM/vzip.ll @@ -282,6 +282,25 @@ entry: ret <8 x i16> %0 } +; NOTE: The mask here looks like something that could be done with a vzip, +; but which the current handling of two-result vzip can't do - thus ending up +; as a vtrn. +define <8 x i16> @vzip_lower_shufflemask_undef_rev(<4 x i16>* %A, <4 x i16>* %B) { +; CHECK-LABEL: vzip_lower_shufflemask_undef_rev: +; CHECK: @ BB#0: @ %entry +; CHECK-NEXT: vldr d16, [r1] +; CHECK-NEXT: vldr d19, [r0] +; CHECK-NEXT: vtrn.16 d19, d16 +; CHECK-NEXT: vmov r0, r1, d18 +; CHECK-NEXT: vmov r2, r3, d19 +; CHECK-NEXT: mov pc, lr +entry: + %tmp1 = load <4 x i16>, <4 x i16>* %A + %tmp2 = load <4 x i16>, <4 x i16>* %B + %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> + ret <8 x i16> %0 +} + define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) { ; CHECK-LABEL: vzip_lower_shufflemask_zeroed: ; CHECK: @ BB#0: @ %entry diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 953f3bdd06e8..78c88f401cbc 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -10,8 +10,8 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: @@ -40,8 +40,8 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: @@ -70,8 +70,8 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: @@ -101,8 +101,8 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: @@ -131,10 +131,10 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: @@ -172,10 +172,10 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: @@ -213,10 +213,10 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: @@ -252,10 +252,10 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: @@ -291,10 +291,10 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: @@ -326,9 +326,9 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: @@ -356,9 +356,9 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: @@ -387,9 +387,9 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: @@ -418,8 +418,8 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; SANDY-LABEL: test_broadcastf128: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastf128: ; HASWELL: # BB#0: @@ -443,8 +443,8 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; SANDY-LABEL: test_broadcastsd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastsd_ymm: ; HASWELL: # BB#0: @@ -469,8 +469,8 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { define <4 x float> @test_broadcastss(float *%a0) { ; SANDY-LABEL: test_broadcastss: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastss: ; HASWELL: # BB#0: @@ -496,7 +496,7 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { ; SANDY-LABEL: test_broadcastss_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastss_ymm: ; HASWELL: # BB#0: @@ -522,9 +522,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: @@ -560,9 +560,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: @@ -598,9 +598,9 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: @@ -632,12 +632,12 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: @@ -669,10 +669,10 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: @@ -704,10 +704,10 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: @@ -741,8 +741,8 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: @@ -774,9 +774,9 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] -; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: @@ -804,9 +804,9 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] -; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: @@ -834,9 +834,9 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: @@ -866,9 +866,9 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; SANDY-LABEL: test_extractf128: ; SANDY: # BB#0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_extractf128: ; HASWELL: # BB#0: @@ -900,7 +900,7 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY: # BB#0: ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: @@ -929,9 +929,9 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: @@ -960,9 +960,9 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: @@ -991,9 +991,9 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: @@ -1023,9 +1023,9 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SANDY-LABEL: test_insertf128: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_insertf128: ; HASWELL: # BB#0: @@ -1059,8 +1059,8 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float define <32 x i8> @test_lddqu(i8* %a0) { ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: @@ -1084,10 +1084,10 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; SANDY-LABEL: test_maskmovpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovpd: ; HASWELL: # BB#0: @@ -1119,10 +1119,10 @@ declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { ; SANDY-LABEL: test_maskmovpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] +; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] ; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovpd_ymm: ; HASWELL: # BB#0: @@ -1154,10 +1154,10 @@ declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwi define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; SANDY-LABEL: test_maskmovps: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovps: ; HASWELL: # BB#0: @@ -1189,10 +1189,10 @@ declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { ; SANDY-LABEL: test_maskmovps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] ; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovps_ymm: ; HASWELL: # BB#0: @@ -1225,8 +1225,8 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: @@ -1256,8 +1256,8 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: @@ -1288,7 +1288,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: @@ -1319,7 +1319,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY: # BB#0: ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: @@ -1348,10 +1348,10 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: @@ -1382,10 +1382,10 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: @@ -1417,9 +1417,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: @@ -1451,9 +1451,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { define i32 @test_movmskpd(<4 x double> %a0) { ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] +; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: @@ -1479,9 +1479,9 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone define i32 @test_movmskps(<8 x float> %a0) { ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: @@ -1508,8 +1508,8 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: @@ -1537,8 +1537,8 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: @@ -1566,9 +1566,9 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: @@ -1601,9 +1601,9 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: @@ -1635,12 +1635,12 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: @@ -1671,12 +1671,12 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: @@ -1708,8 +1708,8 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: @@ -1738,8 +1738,8 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: @@ -1767,10 +1767,10 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: orpd: ; HASWELL: # BB#0: @@ -1806,10 +1806,10 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: @@ -1846,9 +1846,9 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_permilpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilpd: ; HASWELL: # BB#0: @@ -1880,10 +1880,10 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_permilpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilpd_ymm: ; HASWELL: # BB#0: @@ -1916,9 +1916,9 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_permilps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilps: ; HASWELL: # BB#0: @@ -1950,10 +1950,10 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_permilps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilps_ymm: ; HASWELL: # BB#0: @@ -1986,8 +1986,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; SANDY-LABEL: test_permilvarpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarpd: ; HASWELL: # BB#0: @@ -2018,7 +2018,7 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarpd_ymm: ; HASWELL: # BB#0: @@ -2048,8 +2048,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; SANDY-LABEL: test_permilvarps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarps: ; HASWELL: # BB#0: @@ -2080,7 +2080,7 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarps_ymm: ; HASWELL: # BB#0: @@ -2112,7 +2112,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: @@ -2148,7 +2148,7 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: @@ -2184,7 +2184,7 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: @@ -2217,10 +2217,10 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] -; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00] +; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: @@ -2254,9 +2254,9 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: @@ -2289,8 +2289,8 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: @@ -2318,10 +2318,10 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] -; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00] +; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: @@ -2354,10 +2354,10 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] -; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: @@ -2391,8 +2391,8 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: @@ -2421,8 +2421,8 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: @@ -2451,11 +2451,11 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_testpd: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testpd: ; HASWELL: # BB#0: @@ -2495,12 +2495,12 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; SANDY-LABEL: test_testpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testpd_ymm: ; HASWELL: # BB#0: @@ -2542,11 +2542,11 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_testps: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testps: ; HASWELL: # BB#0: @@ -2586,12 +2586,12 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; SANDY-LABEL: test_testps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testps_ymm: ; HASWELL: # BB#0: @@ -2635,7 +2635,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: @@ -2669,7 +2669,7 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: @@ -2698,9 +2698,9 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: @@ -2733,8 +2733,8 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: @@ -2762,10 +2762,10 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: @@ -2801,10 +2801,10 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: @@ -2841,7 +2841,7 @@ define void @test_zeroall() { ; SANDY-LABEL: test_zeroall: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroall # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # BB#0: @@ -2866,7 +2866,7 @@ define void @test_zeroupper() { ; SANDY-LABEL: test_zeroupper: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index 2d0a81046b4e..85db44ddd232 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -493,7 +493,7 @@ entry: define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vmovaps %ymm0, (%rdi) +; SKX-NEXT: vmovups %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: diff --git a/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll index 4d0b5ccc16b0..9d0900f3b424 100644 --- a/test/CodeGen/X86/extractelement-legalization-store-ordering.ll +++ b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll @@ -15,18 +15,18 @@ define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* noca ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: paddd (%ecx), %xmm0 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movdqa %xmm0, (%ecx) -; CHECK-NEXT: movl (%ecx), %esi -; CHECK-NEXT: movl 4(%ecx), %edi -; CHECK-NEXT: shll $4, %edx -; CHECK-NEXT: movl 8(%ecx), %ebx -; CHECK-NEXT: movl 12(%ecx), %ecx -; CHECK-NEXT: movl %esi, 12(%eax,%edx) -; CHECK-NEXT: movl %edi, (%eax,%edx) -; CHECK-NEXT: movl %ebx, 8(%eax,%edx) -; CHECK-NEXT: movl %ecx, 4(%eax,%edx) +; CHECK-NEXT: paddd (%edx), %xmm0 +; CHECK-NEXT: movdqa %xmm0, (%edx) +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movl 4(%edx), %edi +; CHECK-NEXT: shll $4, %ecx +; CHECK-NEXT: movl 8(%edx), %ebx +; CHECK-NEXT: movl 12(%edx), %edx +; CHECK-NEXT: movl %esi, 12(%eax,%ecx) +; CHECK-NEXT: movl %edi, (%eax,%ecx) +; CHECK-NEXT: movl %ebx, 8(%eax,%ecx) +; CHECK-NEXT: movl %edx, 4(%eax,%ecx) ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx diff --git a/test/CodeGen/X86/f16c-schedule.ll b/test/CodeGen/X86/f16c-schedule.ll deleted file mode 100644 index 15ae4a49d7d3..000000000000 --- a/test/CodeGen/X86/f16c-schedule.ll +++ /dev/null @@ -1,144 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { -; IVY-LABEL: test_vcvtph2ps_128: -; IVY: # BB#0: -; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] -; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtph2ps_128: -; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] -; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_vcvtph2ps_128: -; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtph2ps_128: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = load <8 x i16>, <8 x i16> *%a1 - %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) - %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) - %4 = fadd <4 x float> %2, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) - -define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { -; IVY-LABEL: test_vcvtph2ps_256: -; IVY: # BB#0: -; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] -; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] -; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtph2ps_256: -; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] -; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_vcvtph2ps_256: -; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtph2ps_256: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = load <8 x i16>, <8 x i16> *%a1 - %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) - %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) - %4 = fadd <8 x float> %2, %3 - ret <8 x float> %4 -} -declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) - -define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) { -; IVY-LABEL: test_vcvtps2ph_128: -; IVY: # BB#0: -; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtps2ph_128: -; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_vcvtps2ph_128: -; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtps2ph_128: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [12:1.00] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) - %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0) - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> - store <4 x i16> %3, <4 x i16> *%a2 - ret <8 x i16> %1 -} -declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) - -define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) { -; IVY-LABEL: test_vcvtps2ph_256: -; IVY: # BB#0: -; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] -; IVY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; IVY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_vcvtps2ph_256: -; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_vcvtps2ph_256: -; BTVER2: # BB#0: -; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_vcvtps2ph_256: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [12:1.00] -; ZNVER1-NEXT: vzeroupper # sched: [?:0.000000e+00] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) - %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0) - store <8 x i16> %2, <8 x i16> *%a2 - ret <8 x i16> %1 -} -declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll index 98082ec611d4..6c6bc8bdc1d1 100644 --- a/test/CodeGen/X86/fp128-i128.ll +++ b/test/CodeGen/X86/fp128-i128.ll @@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: jmp foo # TAILCALL diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index e09ad3e4e0b8..c3109673468e 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -16,10 +16,10 @@ ; LIN: sarq $32, %r[[REG2]] ; LIN: movslq %e[[REG4]], %r[[REG3:.+]] ; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 -; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 -; LIN: movq %rdi, %xmm1 -; LIN: movq %r[[REG3]], %xmm0 +; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0 +; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0 +; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 +; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 ; WIN: movdqa (%rdx), %xmm0 ; WIN: pand (%r8), %xmm0 @@ -29,10 +29,10 @@ ; WIN: sarq $32, %r[[REG2]] ; WIN: movslq %e[[REG4]], %r[[REG3:.+]] ; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 -; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 -; WIN: movdqa (%r[[REG2]]), %xmm0 -; WIN: movq %r[[REG2]], %xmm1 +; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0 +; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0 +; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 +; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>, <4 x i32>* %i diff --git a/test/CodeGen/X86/lea32-schedule.ll b/test/CodeGen/X86/lea32-schedule.ll deleted file mode 100644 index e42ce30c5a6d..000000000000 --- a/test/CodeGen/X86/lea32-schedule.ll +++ /dev/null @@ -1,653 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i32 @test_lea_offset(i32) { -; GENERIC-LABEL: test_lea_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal -24(%rdi), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_offset: -; SLM: # BB#0: -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = add nsw i32 %0, -24 - ret i32 %2 -} - -define i32 @test_lea_offset_big(i32) { -; GENERIC-LABEL: test_lea_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal 1024(%rdi), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_offset_big: -; SLM: # BB#0: -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = add nsw i32 %0, 1024 - ret i32 %2 -} - -; Function Attrs: norecurse nounwind readnone uwtable -define i32 @test_lea_add(i32, i32) { -; GENERIC-LABEL: test_lea_add: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal (%rdi,%rsi), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add nsw i32 %1, %0 - ret i32 %3 -} - -define i32 @test_lea_add_offset(i32, i32) { -; GENERIC-LABEL: test_lea_add_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal 16(%rdi,%rsi), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_offset: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $16, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add i32 %0, 16 - %4 = add i32 %3, %1 - ret i32 %4 -} - -define i32 @test_lea_add_offset_big(i32, i32) { -; GENERIC-LABEL: test_lea_add_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal -4096(%rdi,%rsi), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_offset_big: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-4096, %eax # imm = 0xF000 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-4096, %eax # imm = 0xF000 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add i32 %0, -4096 - %4 = add i32 %3, %1 - ret i32 %4 -} - -define i32 @test_lea_mul(i32) { -; GENERIC-LABEL: test_lea_mul: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal (%rdi,%rdi,2), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul: -; SLM: # BB#0: -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i32 %0, 3 - ret i32 %2 -} - -define i32 @test_lea_mul_offset(i32) { -; GENERIC-LABEL: test_lea_mul_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal -32(%rdi,%rdi,2), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul_offset: -; SLM: # BB#0: -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-32, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i32 %0, 3 - %3 = add nsw i32 %2, -32 - ret i32 %3 -} - -define i32 @test_lea_mul_offset_big(i32) { -; GENERIC-LABEL: test_lea_mul_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal 10000(%rdi,%rdi,8), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul_offset_big: -; SLM: # BB#0: -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $10000, %eax # imm = 0x2710 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $10000, %eax # imm = 0x2710 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i32 %0, 9 - %3 = add nsw i32 %2, 10000 - ret i32 %3 -} - -define i32 @test_lea_add_scale(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal (%rdi,%rsi,2), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i32 %1, 1 - %4 = add nsw i32 %3, %0 - ret i32 %4 -} - -define i32 @test_lea_add_scale_offset(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal 96(%rdi,%rsi,4), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale_offset: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $96, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i32 %1, 2 - %4 = add i32 %0, 96 - %5 = add i32 %4, %3 - ret i32 %5 -} - -define i32 @test_lea_add_scale_offset_big(i32, i32) { -; GENERIC-LABEL: test_lea_add_scale_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: # kill: %ESI %ESI %RSI -; GENERIC-NEXT: # kill: %EDI %EDI %RDI -; GENERIC-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: # kill: %ESI %ESI %RSI -; ATOM-NEXT: # kill: %EDI %EDI %RDI -; ATOM-NEXT: leal -1200(%rdi,%rsi,8), %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale_offset_big: -; SLM: # BB#0: -; SLM-NEXT: # kill: %ESI %ESI %RSI -; SLM-NEXT: # kill: %EDI %EDI %RDI -; SLM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: # kill: %ESI %ESI %RSI -; SANDY-NEXT: # kill: %EDI %EDI %RDI -; SANDY-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; SANDY-NEXT: addl $-1200, %eax # imm = 0xFB50 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: # kill: %ESI %ESI %RSI -; HASWELL-NEXT: # kill: %EDI %EDI %RDI -; HASWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50] -; HASWELL-NEXT: addl $-1200, %eax # imm = 0xFB50 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: # kill: %ESI %ESI %RSI -; BTVER2-NEXT: # kill: %EDI %EDI %RDI -; BTVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: # kill: %ESI %ESI %RSI -; ZNVER1-NEXT: # kill: %EDI %EDI %RDI -; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i32 %1, 3 - %4 = add i32 %0, -1200 - %5 = add i32 %4, %3 - ret i32 %5 -} diff --git a/test/CodeGen/X86/lea64-schedule.ll b/test/CodeGen/X86/lea64-schedule.ll deleted file mode 100644 index 0ff1574c809d..000000000000 --- a/test/CodeGen/X86/lea64-schedule.ll +++ /dev/null @@ -1,534 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i64 @test_lea_offset(i64) { -; GENERIC-LABEL: test_lea_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset: -; ATOM: # BB#0: -; ATOM-NEXT: leaq -24(%rdi), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_offset: -; SLM: # BB#0: -; SLM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset: -; SANDY: # BB#0: -; SANDY-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = add nsw i64 %0, -24 - ret i64 %2 -} - -define i64 @test_lea_offset_big(i64) { -; GENERIC-LABEL: test_lea_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: leaq 1024(%rdi), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_offset_big: -; SLM: # BB#0: -; SLM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = add nsw i64 %0, 1024 - ret i64 %2 -} - -; Function Attrs: norecurse nounwind readnone uwtable -define i64 @test_lea_add(i64, i64) { -; GENERIC-LABEL: test_lea_add: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add: -; ATOM: # BB#0: -; ATOM-NEXT: leaq (%rdi,%rsi), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add: -; SLM: # BB#0: -; SLM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add nsw i64 %1, %0 - ret i64 %3 -} - -define i64 @test_lea_add_offset(i64, i64) { -; GENERIC-LABEL: test_lea_add_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset: -; ATOM: # BB#0: -; ATOM-NEXT: leaq 16(%rdi,%rsi), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_offset: -; SLM: # BB#0: -; SLM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $16, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add i64 %0, 16 - %4 = add i64 %3, %1 - ret i64 %4 -} - -define i64 @test_lea_add_offset_big(i64, i64) { -; GENERIC-LABEL: test_lea_add_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: leaq -4096(%rdi,%rsi), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_offset_big: -; SLM: # BB#0: -; SLM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-4096, %rax # imm = 0xF000 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-4096, %rax # imm = 0xF000 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = add i64 %0, -4096 - %4 = add i64 %3, %1 - ret i64 %4 -} - -define i64 @test_lea_mul(i64) { -; GENERIC-LABEL: test_lea_mul: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul: -; ATOM: # BB#0: -; ATOM-NEXT: leaq (%rdi,%rdi,2), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul: -; SLM: # BB#0: -; SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i64 %0, 3 - ret i64 %2 -} - -define i64 @test_lea_mul_offset(i64) { -; GENERIC-LABEL: test_lea_mul_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset: -; ATOM: # BB#0: -; ATOM-NEXT: leaq -32(%rdi,%rdi,2), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul_offset: -; SLM: # BB#0: -; SLM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-32, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i64 %0, 3 - %3 = add nsw i64 %2, -32 - ret i64 %3 -} - -define i64 @test_lea_mul_offset_big(i64) { -; GENERIC-LABEL: test_lea_mul_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_mul_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: leaq 10000(%rdi,%rdi,8), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_mul_offset_big: -; SLM: # BB#0: -; SLM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_mul_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $10000, %rax # imm = 0x2710 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_mul_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $10000, %rax # imm = 0x2710 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_mul_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_mul_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %2 = mul nsw i64 %0, 9 - %3 = add nsw i64 %2, 10000 - ret i64 %3 -} - -define i64 @test_lea_add_scale(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale: -; ATOM: # BB#0: -; ATOM-NEXT: leaq (%rdi,%rsi,2), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale: -; SLM: # BB#0: -; SLM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i64 %1, 1 - %4 = add nsw i64 %3, %0 - ret i64 %4 -} - -define i64 @test_lea_add_scale_offset(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale_offset: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset: -; ATOM: # BB#0: -; ATOM-NEXT: leaq 96(%rdi,%rsi,4), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale_offset: -; SLM: # BB#0: -; SLM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $96, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i64 %1, 2 - %4 = add i64 %0, 96 - %5 = add i64 %4, %3 - ret i64 %5 -} - -define i64 @test_lea_add_scale_offset_big(i64, i64) { -; GENERIC-LABEL: test_lea_add_scale_offset_big: -; GENERIC: # BB#0: -; GENERIC-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lea_add_scale_offset_big: -; ATOM: # BB#0: -; ATOM-NEXT: leaq -1200(%rdi,%rsi,8), %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; -; SLM-LABEL: test_lea_add_scale_offset_big: -; SLM: # BB#0: -; SLM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_lea_add_scale_offset_big: -; SANDY: # BB#0: -; SANDY-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; SANDY-NEXT: addq $-1200, %rax # imm = 0xFB50 -; SANDY-NEXT: # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_lea_add_scale_offset_big: -; HASWELL: # BB#0: -; HASWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50] -; HASWELL-NEXT: addq $-1200, %rax # imm = 0xFB50 -; HASWELL-NEXT: # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_lea_add_scale_offset_big: -; BTVER2: # BB#0: -; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_lea_add_scale_offset_big: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %3 = shl i64 %1, 3 - %4 = add i64 %0, -1200 - %5 = add i64 %4, %3 - ret i64 %5 -} diff --git a/test/CodeGen/X86/popcnt-schedule.ll b/test/CodeGen/X86/popcnt-schedule.ll deleted file mode 100644 index c0d11280fc1d..000000000000 --- a/test/CodeGen/X86/popcnt-schedule.ll +++ /dev/null @@ -1,167 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 - -define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) { -; GENERIC-LABEL: test_ctpop_i16: -; GENERIC: # BB#0: -; GENERIC-NEXT: popcntw (%rsi), %cx -; GENERIC-NEXT: popcntw %di, %ax -; GENERIC-NEXT: orl %ecx, %eax -; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq -; -; SLM-LABEL: test_ctpop_i16: -; SLM: # BB#0: -; SLM-NEXT: popcntw (%rsi), %cx # sched: [6:1.00] -; SLM-NEXT: popcntw %di, %ax # sched: [3:1.00] -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: # kill: %AX %AX %EAX -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i16: -; SANDY: # BB#0: -; SANDY-NEXT: popcntw (%rsi), %cx # sched: [7:1.00] -; SANDY-NEXT: popcntw %di, %ax # sched: [3:1.00] -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: # kill: %AX %AX %EAX -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i16: -; HASWELL: # BB#0: -; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [7:1.00] -; HASWELL-NEXT: popcntw %di, %ax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: # kill: %AX %AX %EAX -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_ctpop_i16: -; BTVER2: # BB#0: -; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [8:1.00] -; BTVER2-NEXT: popcntw %di, %ax # sched: [3:1.00] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: # kill: %AX %AX %EAX -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i16: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [10:1.00] -; ZNVER1-NEXT: popcntw %di, %ax # sched: [3:1.00] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: # kill: %AX %AX %EAX -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = load i16, i16 *%a1 - %2 = tail call i16 @llvm.ctpop.i16( i16 %1 ) - %3 = tail call i16 @llvm.ctpop.i16( i16 %a0 ) - %4 = or i16 %2, %3 - ret i16 %4 -} -declare i16 @llvm.ctpop.i16(i16) - -define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_ctpop_i32: -; GENERIC: # BB#0: -; GENERIC-NEXT: popcntl (%rsi), %ecx -; GENERIC-NEXT: popcntl %edi, %eax -; GENERIC-NEXT: orl %ecx, %eax -; GENERIC-NEXT: retq -; -; SLM-LABEL: test_ctpop_i32: -; SLM: # BB#0: -; SLM-NEXT: popcntl (%rsi), %ecx # sched: [6:1.00] -; SLM-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i32: -; SANDY: # BB#0: -; SANDY-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00] -; SANDY-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i32: -; HASWELL: # BB#0: -; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00] -; HASWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_ctpop_i32: -; BTVER2: # BB#0: -; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00] -; BTVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i32: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [10:1.00] -; ZNVER1-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = load i32, i32 *%a1 - %2 = tail call i32 @llvm.ctpop.i32( i32 %1 ) - %3 = tail call i32 @llvm.ctpop.i32( i32 %a0 ) - %4 = or i32 %2, %3 - ret i32 %4 -} -declare i32 @llvm.ctpop.i32(i32) - -define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_ctpop_i64: -; GENERIC: # BB#0: -; GENERIC-NEXT: popcntq (%rsi), %rcx -; GENERIC-NEXT: popcntq %rdi, %rax -; GENERIC-NEXT: orq %rcx, %rax -; GENERIC-NEXT: retq -; -; SLM-LABEL: test_ctpop_i64: -; SLM: # BB#0: -; SLM-NEXT: popcntq (%rsi), %rcx # sched: [6:1.00] -; SLM-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-LABEL: test_ctpop_i64: -; SANDY: # BB#0: -; SANDY-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00] -; SANDY-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-LABEL: test_ctpop_i64: -; HASWELL: # BB#0: -; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [7:1.00] -; HASWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] -; -; BTVER2-LABEL: test_ctpop_i64: -; BTVER2: # BB#0: -; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00] -; BTVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-LABEL: test_ctpop_i64: -; ZNVER1: # BB#0: -; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [10:1.00] -; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [5:0.50] - %1 = load i64, i64 *%a1 - %2 = tail call i64 @llvm.ctpop.i64( i64 %1 ) - %3 = tail call i64 @llvm.ctpop.i64( i64 %a0 ) - %4 = or i64 %2, %3 - ret i64 %4 -} -declare i64 @llvm.ctpop.i64(i64) diff --git a/test/CodeGen/X86/pr34139.ll b/test/CodeGen/X86/pr34139.ll new file mode 100644 index 000000000000..c20c2cd510c7 --- /dev/null +++ b/test/CodeGen/X86/pr34139.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s + +define void @f_f(<16 x double>* %ptr) { +; CHECK-LABEL: f_f: +; CHECK: # BB#0: +; CHECK-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, (%rax) +; CHECK-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vmovapd (%rdi), %zmm1 +; CHECK-NEXT: vmovapd 64(%rdi), %zmm2 +; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 +; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovapd %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vmovapd %zmm2, 64(%rdi) +; CHECK-NEXT: vmovapd %zmm1, (%rdi) + store <16 x i8> , <16 x i8>* undef + %load_mask8.i.i.i = load <16 x i8>, <16 x i8>* undef + %v.i.i.i.i = load <16 x double>, <16 x double>* %ptr + %mask_vec_i1.i.i.i51.i.i = icmp ne <16 x i8> %load_mask8.i.i.i, zeroinitializer + %v1.i.i.i.i = select <16 x i1> %mask_vec_i1.i.i.i51.i.i, <16 x double> undef, <16 x double> %v.i.i.i.i + store <16 x double> %v1.i.i.i.i, <16 x double>* %ptr + unreachable +} diff --git a/test/CodeGen/X86/pr34177.ll b/test/CodeGen/X86/pr34177.ll new file mode 100644 index 000000000000..7c210058ae6c --- /dev/null +++ b/test/CodeGen/X86/pr34177.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mattr=+avx512f | FileCheck %s +; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # BB#0: +; CHECK-NEXT: vmovdqa {{.*#+}} xmm0 = [2,3] +; CHECK-NEXT: vpextrq $1, %xmm0, %rax +; CHECK-NEXT: vmovq %xmm0, %rcx +; CHECK-NEXT: negq %rdx +; CHECK-NEXT: fld1 +; CHECK-NEXT: fldz +; CHECK-NEXT: fld %st(0) +; CHECK-NEXT: fcmove %st(2), %st(0) +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: fld %st(1) +; CHECK-NEXT: fcmove %st(3), %st(0) +; CHECK-NEXT: cmpq %rax, %rax +; CHECK-NEXT: fld %st(2) +; CHECK-NEXT: fcmove %st(4), %st(0) +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmpq %rax, %rax +; CHECK-NEXT: fld %st(3) +; CHECK-NEXT: fcmove %st(5), %st(0) +; CHECK-NEXT: fstp %st(5) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(4) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(2) +; CHECK-NEXT: fadd %st(3) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: faddp %st(3) +; CHECK-NEXT: fxch %st(3) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpt (%rax) +; CHECK-NEXT: fstpt (%rax) + %1 = icmp eq <4 x i64> , undef + %2 = select <4 x i1> %1, <4 x x86_fp80> , <4 x x86_fp80> zeroinitializer + %3 = fadd <4 x x86_fp80> undef, %2 + %4 = shufflevector <4 x x86_fp80> %3, <4 x x86_fp80> undef, <8 x i32> + store <8 x x86_fp80> %4, <8 x x86_fp80>* undef, align 16 + unreachable +} + diff --git a/test/CodeGen/X86/pr34271-1.ll b/test/CodeGen/X86/pr34271-1.ll new file mode 100644 index 000000000000..2e2f0fd0aa94 --- /dev/null +++ b/test/CodeGen/X86/pr34271-1.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw | FileCheck %s + +define <16 x i16> @foo(<16 x i32> %i) { +; CHECK-LABEL: foo: +; CHECK: # BB#0: +; CHECK-NEXT: vpminud {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vpmovdw %zmm0, %ymm0 +; CHECK-NEXT: retq + %x3 = icmp ult <16 x i32> %i, + %x5 = select <16 x i1> %x3, <16 x i32> %i, <16 x i32> + %x6 = trunc <16 x i32> %x5 to <16 x i16> + ret <16 x i16> %x6 +} diff --git a/test/CodeGen/X86/pr34271.ll b/test/CodeGen/X86/pr34271.ll new file mode 100644 index 000000000000..40d01617c30d --- /dev/null +++ b/test/CodeGen/X86/pr34271.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +; CHECK: .LCPI0_0: +; CHECK-NEXT: .zero 16,1 + +define <4 x i32> @f(<4 x i32> %a) { +; CHECK-LABEL: f: +; CHECK: # BB#0: +; CHECK-NEXT: paddd .LCPI0_0(%rip), %xmm0 +; CHECK-NEXT: retq + %v = add nuw nsw <4 x i32> %a, + ret <4 x i32> %v +} diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll index 02a968c6f27d..9102e68f231b 100644 --- a/test/CodeGen/X86/recip-fastmath.ll +++ b/test/CodeGen/X86/recip-fastmath.ll @@ -45,9 +45,9 @@ define float @f32_no_estimate(float %x) #0 { ; ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: @@ -113,11 +113,11 @@ define float @f32_one_step(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # BB#0: @@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -215,7 +215,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # BB#0: @@ -284,9 +284,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: @@ -350,13 +350,13 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # BB#0: @@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -463,7 +463,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # BB#0: @@ -546,9 +546,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: @@ -621,11 +621,11 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # BB#0: @@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -745,7 +745,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll index c82eab84757f..e6070e41a2b2 100644 --- a/test/CodeGen/X86/recip-fastmath2.ll +++ b/test/CodeGen/X86/recip-fastmath2.ll @@ -39,8 +39,8 @@ define float @f32_no_step_2(float %x) #3 { ; SANDY-LABEL: f32_no_step_2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_no_step_2: ; HASWELL: # BB#0: @@ -110,12 +110,12 @@ define float @f32_one_step_2(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step_2: ; HASWELL: # BB#0: @@ -198,13 +198,13 @@ define float @f32_one_step_2_divs(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -313,8 +313,8 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_two_step_2: ; HASWELL: # BB#0: @@ -403,14 +403,14 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step2: ; HASWELL: # BB#0: @@ -501,15 +501,15 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -629,8 +629,8 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # BB#0: @@ -741,12 +741,12 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step2: ; HASWELL: # BB#0: @@ -848,13 +848,13 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -988,8 +988,8 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # BB#0: @@ -1070,7 +1070,7 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_step: ; HASWELL: # BB#0: @@ -1125,8 +1125,8 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_step2: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index 29f726c3df6a..f44cee9db22c 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -31,8 +31,8 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: @@ -79,8 +79,8 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_addss: ; SANDY: # BB#0: ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addss: ; HASWELL: # BB#0: @@ -134,9 +134,9 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: @@ -194,9 +194,9 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: @@ -252,9 +252,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: @@ -308,7 +308,7 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpss: ; HASWELL: # BB#0: @@ -384,16 +384,16 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_comiss: ; SANDY: # BB#0: ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_comiss: ; HASWELL: # BB#0: @@ -468,10 +468,10 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ss: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2ss: ; HASWELL: # BB#0: @@ -524,10 +524,10 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ssq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2ssq: ; HASWELL: # BB#0: @@ -580,10 +580,10 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2si: ; HASWELL: # BB#0: @@ -639,10 +639,10 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2siq: ; HASWELL: # BB#0: @@ -698,10 +698,10 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttss2si: ; HASWELL: # BB#0: @@ -754,10 +754,10 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttss2siq: ; HASWELL: # BB#0: @@ -807,9 +807,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: @@ -855,9 +855,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; ; SANDY-LABEL: test_divss: ; SANDY: # BB#0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divss: ; HASWELL: # BB#0: @@ -904,8 +904,8 @@ define void @test_ldmxcsr(i32 %a0) { ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # BB#0: ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ldmxcsr: ; HASWELL: # BB#0: @@ -954,8 +954,8 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: @@ -1003,8 +1003,8 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxss: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxss: ; HASWELL: # BB#0: @@ -1052,8 +1052,8 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minps: ; SANDY: # BB#0: ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: @@ -1101,8 +1101,8 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minss: ; SANDY: # BB#0: ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minss: ; HASWELL: # BB#0: @@ -1152,10 +1152,10 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: @@ -1210,7 +1210,7 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; SANDY-LABEL: test_movhlps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhlps: ; HASWELL: # BB#0: @@ -1258,10 +1258,10 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhps: ; HASWELL: # BB#0: @@ -1318,7 +1318,7 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # BB#0: @@ -1366,10 +1366,10 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlps: ; HASWELL: # BB#0: @@ -1421,8 +1421,8 @@ define i32 @test_movmskps(<4 x float> %a0) { ; ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: @@ -1467,8 +1467,8 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: @@ -1512,10 +1512,10 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; ; SANDY-LABEL: test_movss_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movss_mem: ; HASWELL: # BB#0: @@ -1567,8 +1567,8 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; ; SANDY-LABEL: test_movss_reg: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movss_reg: ; HASWELL: # BB#0: @@ -1612,10 +1612,10 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: @@ -1665,8 +1665,8 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: @@ -1713,8 +1713,8 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_mulss: ; SANDY: # BB#0: ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulss: ; HASWELL: # BB#0: @@ -1768,9 +1768,9 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: @@ -1823,8 +1823,8 @@ define void @test_prefetchnta(i8* %a0) { ; ; SANDY-LABEL: test_prefetchnta: ; SANDY: # BB#0: -; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_prefetchnta: ; HASWELL: # BB#0: @@ -1871,10 +1871,10 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_rcpps: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00] -; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: @@ -1934,10 +1934,10 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SANDY-LABEL: test_rcpss: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpss: ; HASWELL: # BB#0: @@ -1999,9 +1999,9 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: @@ -2060,11 +2060,11 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; ; SANDY-LABEL: test_rsqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtss: ; HASWELL: # BB#0: @@ -2124,7 +2124,7 @@ define void @test_sfence() { ; SANDY-LABEL: test_sfence: ; SANDY: # BB#0: ; SANDY-NEXT: sfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sfence: ; HASWELL: # BB#0: @@ -2171,8 +2171,8 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: @@ -2222,10 +2222,10 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00] +; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: @@ -2284,11 +2284,11 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # BB#0: @@ -2342,9 +2342,9 @@ define i32 @test_stmxcsr() { ; ; SANDY-LABEL: test_stmxcsr: ; SANDY: # BB#0: -; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_stmxcsr: ; HASWELL: # BB#0: @@ -2393,8 +2393,8 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: @@ -2441,8 +2441,8 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_subss: ; SANDY: # BB#0: ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subss: ; HASWELL: # BB#0: @@ -2513,16 +2513,16 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_ucomiss: ; SANDY: # BB#0: ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ucomiss: ; HASWELL: # BB#0: @@ -2599,8 +2599,8 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpckhps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: @@ -2651,8 +2651,8 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: @@ -2706,9 +2706,9 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index 6ee908e0c787..62c194f2fc4b 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -31,8 +31,8 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: @@ -79,8 +79,8 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_addsd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsd: ; HASWELL: # BB#0: @@ -129,10 +129,10 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: @@ -189,10 +189,10 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: @@ -252,9 +252,9 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: @@ -308,7 +308,7 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # BB#0: @@ -384,16 +384,16 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_comisd: ; SANDY: # BB#0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_comisd: ; HASWELL: # BB#0: @@ -469,9 +469,9 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: @@ -527,10 +527,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: @@ -584,10 +584,10 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: @@ -642,10 +642,10 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: @@ -701,9 +701,9 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: @@ -758,10 +758,10 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # BB#0: @@ -819,7 +819,7 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # BB#0: @@ -875,10 +875,10 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # BB#0: @@ -939,10 +939,10 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # BB#0: @@ -999,9 +999,9 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # BB#0: @@ -1055,9 +1055,9 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # BB#0: @@ -1116,11 +1116,11 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] +; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # BB#0: @@ -1177,10 +1177,10 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # BB#0: @@ -1237,9 +1237,9 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # BB#0: @@ -1292,10 +1292,10 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # BB#0: @@ -1348,10 +1348,10 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # BB#0: @@ -1401,9 +1401,9 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: @@ -1449,9 +1449,9 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; ; SANDY-LABEL: test_divsd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divsd: ; HASWELL: # BB#0: @@ -1501,7 +1501,7 @@ define void @test_lfence() { ; SANDY-LABEL: test_lfence: ; SANDY: # BB#0: ; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lfence: ; HASWELL: # BB#0: @@ -1547,7 +1547,7 @@ define void @test_mfence() { ; SANDY-LABEL: test_mfence: ; SANDY: # BB#0: ; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mfence: ; HASWELL: # BB#0: @@ -1591,7 +1591,7 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # BB#0: @@ -1634,8 +1634,8 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: @@ -1683,8 +1683,8 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # BB#0: @@ -1732,8 +1732,8 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minpd: ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: @@ -1781,8 +1781,8 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minsd: ; SANDY: # BB#0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minsd: ; HASWELL: # BB#0: @@ -1832,10 +1832,10 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: @@ -1887,10 +1887,10 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # BB#0: @@ -1942,10 +1942,10 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqu: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # BB#0: @@ -2007,12 +2007,12 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SANDY-LABEL: test_movd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movd: ; HASWELL: # BB#0: @@ -2087,13 +2087,13 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; ; SANDY-LABEL: test_movd_64: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.33] +; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovq %xmm0, %rax # sched: [1:0.33] +; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # BB#0: @@ -2159,10 +2159,10 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # BB#0: @@ -2217,10 +2217,10 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # BB#0: @@ -2271,8 +2271,8 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: @@ -2317,8 +2317,8 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: @@ -2364,8 +2364,8 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: @@ -2413,10 +2413,10 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; ; SANDY-LABEL: test_movq_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # BB#0: @@ -2471,7 +2471,7 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # BB#0: @@ -2519,10 +2519,10 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # BB#0: @@ -2576,7 +2576,7 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SANDY-LABEL: test_movsd_reg: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # BB#0: @@ -2620,10 +2620,10 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: @@ -2673,8 +2673,8 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: @@ -2721,8 +2721,8 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_mulsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # BB#0: @@ -2771,10 +2771,10 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orpd: ; HASWELL: # BB#0: @@ -2837,8 +2837,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packssdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # BB#0: @@ -2895,8 +2895,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packsswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # BB#0: @@ -2953,8 +2953,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packuswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # BB#0: @@ -3007,8 +3007,8 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddb: ; HASWELL: # BB#0: @@ -3059,8 +3059,8 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_paddd: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddd: ; HASWELL: # BB#0: @@ -3107,8 +3107,8 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_paddq: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: @@ -3158,9 +3158,9 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_paddsb: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # BB#0: @@ -3211,9 +3211,9 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_paddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # BB#0: @@ -3265,8 +3265,8 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # BB#0: @@ -3318,8 +3318,8 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_paddusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # BB#0: @@ -3370,9 +3370,9 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_paddw: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddw: ; HASWELL: # BB#0: @@ -3422,9 +3422,9 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pand: ; SANDY: # BB#0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: @@ -3484,9 +3484,9 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pandn: ; SANDY: # BB#0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: @@ -3543,8 +3543,8 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pavgb: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # BB#0: @@ -3596,8 +3596,8 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pavgw: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # BB#0: @@ -3650,9 +3650,9 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # BB#0: @@ -3709,9 +3709,9 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # BB#0: @@ -3768,9 +3768,9 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # BB#0: @@ -3828,9 +3828,9 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # BB#0: @@ -3888,9 +3888,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # BB#0: @@ -3948,9 +3948,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # BB#0: @@ -4001,9 +4001,9 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: # kill: %AX %AX %EAX -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: @@ -4051,9 +4051,9 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; ; SANDY-LABEL: test_pinsrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # BB#0: @@ -4107,9 +4107,9 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # BB#0: @@ -4162,8 +4162,8 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # BB#0: @@ -4215,8 +4215,8 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxub: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # BB#0: @@ -4268,8 +4268,8 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # BB#0: @@ -4321,8 +4321,8 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminub: ; SANDY: # BB#0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminub: ; HASWELL: # BB#0: @@ -4368,8 +4368,8 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # BB#0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # BB#0: @@ -4413,7 +4413,7 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # BB#0: @@ -4460,9 +4460,9 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # BB#0: @@ -4509,9 +4509,9 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmullw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: @@ -4567,7 +4567,7 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # BB#0: @@ -4619,9 +4619,9 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_por: ; SANDY: # BB#0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: @@ -4679,9 +4679,9 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_psadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # BB#0: @@ -4735,9 +4735,9 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pshufd: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] +; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # BB#0: @@ -4792,10 +4792,10 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; ; SANDY-LABEL: test_pshufhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # BB#0: @@ -4851,9 +4851,9 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pshuflw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # BB#0: @@ -4906,10 +4906,10 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pslld: ; SANDY: # BB#0: -; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pslld: ; HASWELL: # BB#0: @@ -4965,7 +4965,7 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SANDY-LABEL: test_pslldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # BB#0: @@ -5009,10 +5009,10 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_psllq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psllq: ; HASWELL: # BB#0: @@ -5067,10 +5067,10 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psllw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psllw: ; HASWELL: # BB#0: @@ -5125,10 +5125,10 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrad: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrad: ; HASWELL: # BB#0: @@ -5183,10 +5183,10 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psraw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psraw: ; HASWELL: # BB#0: @@ -5241,10 +5241,10 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrld: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrld: ; HASWELL: # BB#0: @@ -5300,7 +5300,7 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SANDY-LABEL: test_psrldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # BB#0: @@ -5344,10 +5344,10 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_psrlq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # BB#0: @@ -5402,10 +5402,10 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psrlw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # BB#0: @@ -5462,8 +5462,8 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: @@ -5514,8 +5514,8 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psubd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: @@ -5562,8 +5562,8 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_psubq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: @@ -5614,8 +5614,8 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # BB#0: @@ -5667,8 +5667,8 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # BB#0: @@ -5720,8 +5720,8 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # BB#0: @@ -5773,8 +5773,8 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # BB#0: @@ -5826,8 +5826,8 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: @@ -5878,8 +5878,8 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_punpckhbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # BB#0: @@ -5931,9 +5931,9 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckhdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # BB#0: @@ -5986,10 +5986,10 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # BB#0: -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # BB#0: @@ -6044,8 +6044,8 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpckhwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # BB#0: @@ -6095,9 +6095,9 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_punpcklbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # BB#0: @@ -6149,9 +6149,9 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # BB#0: @@ -6205,9 +6205,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # BB#0: @@ -6262,8 +6262,8 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpcklwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # BB#0: @@ -6313,9 +6313,9 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pxor: ; SANDY: # BB#0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: @@ -6369,9 +6369,9 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: @@ -6425,10 +6425,10 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: @@ -6487,11 +6487,11 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # BB#0: @@ -6546,8 +6546,8 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: @@ -6594,8 +6594,8 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_subsd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subsd: ; HASWELL: # BB#0: @@ -6666,16 +6666,16 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SANDY-LABEL: test_ucomisd: ; SANDY: # BB#0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # BB#0: @@ -6751,9 +6751,9 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpckhpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: @@ -6813,9 +6813,9 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: @@ -6868,10 +6868,10 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/sse3-schedule.ll b/test/CodeGen/X86/sse3-schedule.ll index ad38d1c6ff49..5f41ccda0fde 100644 --- a/test/CodeGen/X86/sse3-schedule.ll +++ b/test/CodeGen/X86/sse3-schedule.ll @@ -31,8 +31,8 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: @@ -80,8 +80,8 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: @@ -128,9 +128,9 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_haddpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: @@ -177,9 +177,9 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: @@ -226,9 +226,9 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: @@ -275,9 +275,9 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: @@ -323,8 +323,8 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: @@ -371,9 +371,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: @@ -428,9 +428,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: @@ -485,9 +485,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/sse41-schedule.ll b/test/CodeGen/X86/sse41-schedule.ll index 26cca98816a3..ac600fed0ea0 100644 --- a/test/CodeGen/X86/sse41-schedule.ll +++ b/test/CodeGen/X86/sse41-schedule.ll @@ -25,10 +25,10 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: @@ -72,9 +72,9 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: @@ -120,9 +120,9 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: @@ -169,9 +169,9 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: @@ -212,9 +212,9 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_dppd: ; SANDY: # BB#0: -; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dppd: ; HASWELL: # BB#0: @@ -255,9 +255,9 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] +; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: @@ -299,8 +299,8 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SANDY-LABEL: test_insertps: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_insertps: ; HASWELL: # BB#0: @@ -339,8 +339,8 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: @@ -376,9 +376,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # BB#0: @@ -421,8 +421,8 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packusdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # BB#0: @@ -471,8 +471,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SANDY-LABEL: test_pblendvb: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # BB#0: @@ -514,8 +514,8 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pblendw: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # BB#0: @@ -555,9 +555,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # BB#0: @@ -599,9 +599,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; ; SANDY-LABEL: test_pextrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # BB#0: @@ -642,9 +642,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: @@ -684,9 +684,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; ; SANDY-LABEL: test_pextrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] +; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # BB#0: @@ -726,9 +726,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: @@ -769,9 +769,9 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; SANDY-LABEL: test_phminposuw: ; SANDY: # BB#0: -; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # BB#0: @@ -812,9 +812,9 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; ; SANDY-LABEL: test_pinsrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # BB#0: @@ -854,9 +854,9 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; ; SANDY-LABEL: test_pinsrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # BB#0: @@ -898,10 +898,10 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; ; SANDY-LABEL: test_pinsrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # BB#0: @@ -946,8 +946,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # BB#0: @@ -989,8 +989,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # BB#0: @@ -1032,8 +1032,8 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxud: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # BB#0: @@ -1075,8 +1075,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # BB#0: @@ -1118,8 +1118,8 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # BB#0: @@ -1161,8 +1161,8 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # BB#0: @@ -1204,8 +1204,8 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminud: ; SANDY: # BB#0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminud: ; HASWELL: # BB#0: @@ -1247,8 +1247,8 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # BB#0: @@ -1293,9 +1293,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # BB#0: @@ -1344,9 +1344,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # BB#0: @@ -1395,9 +1395,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # BB#0: @@ -1446,9 +1446,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # BB#0: @@ -1497,9 +1497,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # BB#0: @@ -1548,9 +1548,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # BB#0: @@ -1599,9 +1599,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # BB#0: @@ -1650,9 +1650,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # BB#0: @@ -1701,9 +1701,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # BB#0: @@ -1752,9 +1752,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # BB#0: @@ -1803,9 +1803,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # BB#0: @@ -1854,9 +1854,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # BB#0: @@ -1901,9 +1901,9 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmuldq: ; SANDY: # BB#0: -; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # BB#0: @@ -1945,9 +1945,9 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmulld: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # BB#0: @@ -1995,13 +1995,13 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_ptest: ; SANDY: # BB#0: -; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setb %cl # sched: [1:1.00] +; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: setb %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ptest: ; HASWELL: # BB#0: @@ -2059,9 +2059,9 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_roundpd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: @@ -2110,9 +2110,9 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_roundps: ; SANDY: # BB#0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: @@ -2162,9 +2162,9 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-LABEL: test_roundsd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # BB#0: @@ -2214,9 +2214,9 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-LABEL: test_roundss: ; SANDY: # BB#0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundss: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/sse42-schedule.ll b/test/CodeGen/X86/sse42-schedule.ll index adf857e12179..2a502e809bca 100644 --- a/test/CodeGen/X86/sse42-schedule.ll +++ b/test/CodeGen/X86/sse42-schedule.ll @@ -26,9 +26,9 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: @@ -75,9 +75,9 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: @@ -126,7 +126,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: @@ -173,9 +173,9 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: @@ -224,7 +224,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: @@ -291,7 +291,7 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpestri: ; HASWELL: # BB#0: @@ -368,7 +368,7 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] ; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpestrm: ; HASWELL: # BB#0: @@ -427,12 +427,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistri: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpistri: ; HASWELL: # BB#0: @@ -483,9 +483,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistrm: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpistrm: ; HASWELL: # BB#0: @@ -526,9 +526,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_pcmpgtq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtq: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/ssse3-schedule.ll b/test/CodeGen/X86/ssse3-schedule.ll index 24ace69ebb9e..fb3530667ce7 100644 --- a/test/CodeGen/X86/ssse3-schedule.ll +++ b/test/CodeGen/X86/ssse3-schedule.ll @@ -35,9 +35,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SANDY-LABEL: test_pabsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # BB#0: @@ -93,9 +93,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pabsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: @@ -150,7 +150,7 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pabsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: @@ -201,8 +201,8 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_palignr: ; SANDY: # BB#0: ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_palignr: ; HASWELL: # BB#0: @@ -248,9 +248,9 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phaddd: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddd: ; HASWELL: # BB#0: @@ -305,9 +305,9 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddsw: ; HASWELL: # BB#0: @@ -354,9 +354,9 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddw: ; HASWELL: # BB#0: @@ -403,9 +403,9 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phsubd: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubd: ; HASWELL: # BB#0: @@ -460,9 +460,9 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubsw: ; HASWELL: # BB#0: @@ -509,9 +509,9 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubw: ; HASWELL: # BB#0: @@ -558,9 +558,9 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pmaddubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: ; HASWELL: # BB#0: @@ -605,8 +605,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhrsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: ; HASWELL: # BB#0: @@ -651,8 +651,8 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pshufb: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufb: ; HASWELL: # BB#0: @@ -708,8 +708,8 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psignb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignb: ; HASWELL: # BB#0: @@ -765,8 +765,8 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psignd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignd: ; HASWELL: # BB#0: @@ -822,8 +822,8 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psignw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignw: ; HASWELL: # BB#0: diff --git a/test/DllTool/coff-decorated.def b/test/DllTool/coff-decorated.def new file mode 100644 index 000000000000..5a908f388480 --- /dev/null +++ b/test/DllTool/coff-decorated.def @@ -0,0 +1,26 @@ +; RUN: llvm-dlltool -k -m i386 --input-def %s --output-lib %t.a +; RUN: llvm-readobj %t.a | FileCheck %s +; RUN: llvm-nm %t.a | FileCheck %s -check-prefix=CHECK-NM + +LIBRARY test.dll +EXPORTS +CdeclFunction +StdcallFunction@4 +@FastcallFunction@4 +StdcallAlias@4=StdcallFunction@4 +??_7exception@@6B@ + +; CHECK: Name type: noprefix +; CHECK: Symbol: __imp__CdeclFunction +; CHECK: Symbol: _CdeclFunction +; CHECK: Name type: undecorate +; CHECK: Symbol: __imp__StdcallFunction@4 +; CHECK: Symbol: _StdcallFunction@4 +; CHECK: Name type: undecorate +; CHECK: Symbol: __imp_@FastcallFunction@4 +; CHECK: Symbol: @FastcallFunction@4 +; CHECK: Name type: name +; CHECK: Symbol: __imp_??_7exception@@6B@ +; CHECK: Symbol: ??_7exception@@6B@ +; CHECK-NM: w _StdcallAlias@4 +; CHECK-NM: U _StdcallFunction@4 diff --git a/test/Feature/optnone-opt.ll b/test/Feature/optnone-opt.ll index 6410afb6be99..ae0e1a48acc5 100644 --- a/test/Feature/optnone-opt.ll +++ b/test/Feature/optnone-opt.ll @@ -57,7 +57,6 @@ attributes #0 = { optnone noinline } ; Additional IR passes that opt doesn't turn on by default. ; OPT-MORE-DAG: Skipping pass 'Dead Code Elimination' ; OPT-MORE-DAG: Skipping pass 'Dead Instruction Elimination' -; OPT-MORE-DAG: Skipping pass 'Lower atomic intrinsics ; Loop IR passes that opt doesn't turn on by default. ; OPT-LOOP-DAG: Skipping pass 'Delete dead loops' diff --git a/test/Linker/module-flags-pic-1-a.ll b/test/Linker/module-flags-pic-1-a.ll index ea933359ac66..9074aa6e593f 100644 --- a/test/Linker/module-flags-pic-1-a.ll +++ b/test/Linker/module-flags-pic-1-a.ll @@ -2,8 +2,8 @@ ; test linking modules with specified and default PIC levels -!0 = !{ i32 1, !"PIC Level", i32 1 } +!0 = !{ i32 7, !"PIC Level", i32 1 } !llvm.module.flags = !{!0} ; CHECK: !llvm.module.flags = !{!0} -; CHECK: !0 = !{i32 1, !"PIC Level", i32 1} +; CHECK: !0 = !{i32 7, !"PIC Level", i32 1} diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll index e189339e224b..ded12dddf63f 100644 --- a/test/Transforms/Inline/recursive.ll +++ b/test/Transforms/Inline/recursive.ll @@ -37,3 +37,34 @@ declare void @foo2(i8* %in) declare i32 @foo(i32 %param) +; Check that when inlining a non-recursive path into a function's own body that +; we get the re-mapping of instructions correct. +define i32 @test_recursive_inlining_remapping(i1 %init, i8* %addr) { +; CHECK-LABEL: define i32 @test_recursive_inlining_remapping( +bb: + %n = alloca i32 + br i1 %init, label %store, label %load +; CHECK-NOT: alloca +; +; CHECK: %[[N:.*]] = alloca i32 +; CHECK-NEXT: br i1 %init, + +store: + store i32 0, i32* %n + %cast = bitcast i32* %n to i8* + %v = call i32 @test_recursive_inlining_remapping(i1 false, i8* %cast) + ret i32 %v +; CHECK-NOT: call +; +; CHECK: store i32 0, i32* %[[N]] +; CHECK-NEXT: %[[CAST:.*]] = bitcast i32* %[[N]] to i8* +; CHECK-NEXT: %[[INLINED_LOAD:.*]] = load i32, i32* %[[N]] +; CHECK-NEXT: ret i32 %[[INLINED_LOAD]] +; +; CHECK-NOT: call + +load: + %castback = bitcast i8* %addr to i32* + %n.load = load i32, i32* %castback + ret i32 %n.load +} diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll index 77000527a11f..59a5caed481c 100644 --- a/test/Transforms/LowerAtomic/atomic-swap.ll +++ b/test/Transforms/LowerAtomic/atomic-swap.ll @@ -26,3 +26,14 @@ define i8 @swap() { ret i8 %j ; CHECK: ret i8 [[INST]] } + + +define i8 @swap_optnone() noinline optnone { +; CHECK-LABEL: @swap_optnone( + %i = alloca i8 + %j = atomicrmw xchg i8* %i, i8 42 monotonic +; CHECK: [[INST:%[a-z0-9]+]] = load +; CHECK-NEXT: store + ret i8 %j +; CHECK: ret i8 [[INST]] +} diff --git a/test/Transforms/Reassociate/canonicalize-neg-const.ll b/test/Transforms/Reassociate/canonicalize-neg-const.ll index 465460cb53b1..7cb2c3a10e2d 100644 --- a/test/Transforms/Reassociate/canonicalize-neg-const.ll +++ b/test/Transforms/Reassociate/canonicalize-neg-const.ll @@ -154,3 +154,25 @@ define i4 @test13(i4 %x) { %add = add i4 %mul, 3 ret i4 %add } + +; This tests used to cause an infinite loop where we would loop between +; canonicalizing the negated constant (i.e., (X + Y*-5.0) -> (X - Y*5.0)) and +; breaking up a subtract (i.e., (X - Y*5.0) -> X + (0 - Y*5.0)). To break the +; cycle, we don't canonicalize the negative constant if we're going to later +; break up the subtract. +; +; Check to make sure we don't canonicalize +; (%pow2*-5.0 + %sub) -> (%sub - %pow2*5.0) +; as we would later break up this subtract causing a cycle. +; +; CHECK-LABEL: @pr34078 +; CHECK: %mul5.neg = fmul fast double %pow2, -5.000000e-01 +; CHECK: %sub1 = fadd fast double %mul5.neg, %sub +define double @pr34078(double %A) { + %sub = fsub fast double 1.000000e+00, %A + %pow2 = fmul double %A, %A + %mul5 = fmul fast double %pow2, 5.000000e-01 + %sub1 = fsub fast double %sub, %mul5 + %add = fadd fast double %sub1, %sub1 + ret double %add +} From a75fa8aaf2005c6cb7561a9aa5129a38075939a4 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 24 Aug 2017 16:35:14 +0000 Subject: [PATCH 2/4] Vendor import of clang release_50 branch r311606: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311606 --- docs/ReleaseNotes.rst | 148 +++++++++++++++++- include/clang/AST/DeclCXX.h | 74 ++++++++- include/clang/Lex/Preprocessor.h | 8 +- lib/AST/ASTImporter.cpp | 5 + lib/AST/DeclCXX.cpp | 34 +++- lib/CodeGen/CGCXXABI.cpp | 31 +--- lib/CodeGen/ItaniumCXXABI.cpp | 13 +- lib/CodeGen/MicrosoftCXXABI.cpp | 52 +++--- lib/Driver/ToolChains/Darwin.cpp | 7 +- lib/Driver/ToolChains/MSVC.cpp | 85 +++++++--- lib/Driver/ToolChains/MSVC.h | 9 +- lib/Format/WhitespaceManager.cpp | 11 +- lib/Headers/unwind.h | 78 +++------ lib/Lex/PPLexerChange.cpp | 13 +- lib/Lex/Preprocessor.cpp | 2 + lib/Parse/Parser.cpp | 2 - lib/Sema/SemaDeclCXX.cpp | 56 ++++++- lib/Sema/SemaObjCProperty.cpp | 4 +- lib/Serialization/ASTReaderDecl.cpp | 6 + lib/Serialization/ASTWriter.cpp | 3 + lib/StaticAnalyzer/Core/RegionStore.cpp | 13 ++ test/Analysis/ctor.mm | 17 ++ test/CodeGenCXX/uncopyable-args.cpp | 135 ++++++++++++---- test/Driver/clang-translation.c | 4 + test/Index/preamble-conditionals-crash.cpp | 12 ++ test/Index/preamble-conditionals.cpp | 8 + test/SemaObjC/arc-property-decl-attrs.m | 27 ++++ .../ASTMatchers/ASTMatchersNarrowingTest.cpp | 45 +++--- unittests/Format/FormatTestComments.cpp | 7 + 29 files changed, 682 insertions(+), 227 deletions(-) create mode 100644 test/Index/preamble-conditionals-crash.cpp create mode 100644 test/Index/preamble-conditionals.cpp diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 982abb024525..5f9991439697 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -49,6 +49,15 @@ Major New Features - ... +C++ coroutines +^^^^^^^^^^^^^^ +`C++ coroutines TS +`_ +implementation has landed. Use ``-fcoroutines-ts -stdlib=libc++`` to enable +coroutine support. Here is `an example +`_ to get you started. + + Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -58,6 +67,25 @@ Improvements to Clang's diagnostics - -Wunused-lambda-capture warns when a variable explicitly captured by a lambda is not used in the body of the lambda. +- -Wstrict-prototypes is a new warning that warns about non-prototype + function and block declarations and types in C and Objective-C. + +- -Wunguarded-availability is a new warning that warns about uses of new + APIs that were introduced in a system whose version is newer than the + deployment target version. A new Objective-C expression ``@available`` has + been introduced to perform system version checking at runtime. This warning + is off by default to prevent unexpected warnings in existing projects. + However, its less strict sibling -Wunguarded-availability-new is on by + default. It warns about unguarded uses of APIs only when they were introduced + in or after macOS 10.13, iOS 11, tvOS 11 or watchOS 4. + +- The -Wdocumentation warning now allows the use of ``\param`` and + ``\returns`` documentation directives in the documentation comments for + declarations with a function or a block pointer type. + +- The compiler no longer warns about unreachable ``__builtin_unreachable`` + statements. + New Compiler Flags ------------------ @@ -76,8 +104,12 @@ future versions of Clang. New Pragmas in Clang ----------------------- -Clang now supports the ... +- Clang now supports the ``clang attribute`` pragma that allows users to apply + an attribute to multiple declarations. +- ``pragma pack`` directives that are included in a precompiled header are now + applied correctly to the declarations in the compilation unit that includes + that precompiled header. Attribute Changes in Clang -------------------------- @@ -85,6 +117,8 @@ Attribute Changes in Clang - The ``overloadable`` attribute now allows at most one function with a given name to lack the ``overloadable`` attribute. This unmarked function will not have its name mangled. +- The ```ms_abi`` attribute and the ``__builtin_ms_va_list`` types and builtins + are now supported on AArch64. Windows Support --------------- @@ -95,7 +129,41 @@ Clang's support for building native Windows programs ... C Language Changes in Clang --------------------------- -- ... +- Added near complete support for implicit scalar to vector conversion, a GNU + C/C++ language extension. With this extension, the following code is + considered valid: + +.. code-block:: c + + typedef unsigned v4i32 __attribute__((vector_size(16))); + + v4i32 foo(v4i32 a) { + // Here 5 is implicitly casted to an unsigned value and replicated into a + // vector with as many elements as 'a'. + return a + 5; + } + +The implicit conversion of a scalar value to a vector value--in the context of +a vector expression--occurs when: + +- The type of the vector is that of a ``__attribute__((vector_size(size)))`` + vector, not an OpenCL ``__attribute__((ext_vector_type(size)))`` vector type. + +- The scalar value can be casted to that of the vector element's type without + the loss of precision based on the type of the scalar and the type of the + vector's elements. + +- For compile time constant values, the above rule is weakened to consider the + value of the scalar constant rather than the constant's type. + +- Floating point constants with precise integral representations are not + implicitly converted to integer values, this is for compatability with GCC. + + +Currently the basic integer and floating point types with the following +operators are supported: ``+``, ``/``, ``-``, ``*``, ``%``, ``>``, ``<``, +``>=``, ``<=``, ``==``, ``!=``, ``&``, ``|``, ``^`` and the corresponding +assignment operators where applicable. ... @@ -107,6 +175,10 @@ C11 Feature Support C++ Language Changes in Clang ----------------------------- +- As mentioned in `C Language Changes in Clang`_, Clang's support for + implicit scalar to vector conversions also applies to C++. Additionally + the following operators are also supported: ``&&`` and ``||``. + ... C++1z Feature Support @@ -117,12 +189,56 @@ C++1z Feature Support Objective-C Language Changes in Clang ------------------------------------- -... +- Clang now guarantees that a ``readwrite`` property is synthesized when an + ambiguous property (i.e. a property that's declared in multiple protocols) + is synthesized. The ``-Wprotocol-property-synthesis-ambiguity`` warning that + warns about incompatible property types is now promoted to an error when + there's an ambiguity between ``readwrite`` and ``readonly`` properties. + +- Clang now prohibits synthesis of ambiguous properties with incompatible + explicit property attributes. The following property attributes are + checked for differences: ``copy``, ``retain``/``strong``, ``atomic``, + ``getter`` and ``setter``. OpenCL C Language Changes in Clang ---------------------------------- -... +Various bug fixes and improvements: + +- Extended OpenCL-related Clang tests. + +- Improved diagnostics across several areas: scoped address space + qualified variables, function pointers, atomics, type rank for overloading, + block captures, ``reserve_id_t``. + +- Several address space related fixes for constant address space function scope variables, + IR generation, mangling of ``generic`` and alloca (post-fix from general Clang + refactoring of address spaces). + +- Several improvements in extensions: fixed OpenCL version for ``cl_khr_mipmap_image``, + added missing ``cl_khr_3d_image_writes``. + +- Improvements in ``enqueue_kernel``, especially the implementation of ``ndrange_t`` and blocks. + +- OpenCL type related fixes: global samplers, the ``pipe_t`` size, internal type redefinition, + and type compatibility checking in ternary and other operations. + +- The OpenCL header has been extended with missing extension guards, and direct mapping of ``as_type`` + to ``__builtin_astype``. + +- Fixed ``kernel_arg_type_qual`` and OpenCL/SPIR version in metadata. + +- Added proper use of the kernel calling convention to various targets. + +The following new functionalities have been added: + +- Added documentation on OpenCL to Clang user manual. + +- Extended Clang builtins with required ``cl_khr_subgroups`` support. + +- Add ``intel_reqd_sub_group_size`` attribute support. + +- Added OpenCL types to ``CIndex``. OpenMP Support in Clang ---------------------------------- @@ -194,8 +310,30 @@ clang-format libclang -------- -... +- Libclang now provides code-completion results for more C++ constructs + and keywords. The following keywords/identifiers are now included in the + code-completion results: ``static_assert``, ``alignas``, ``constexpr``, + ``final``, ``noexcept``, ``override`` and ``thread_local``. +- Libclang now provides code-completion results for members from dependent + classes. For example: + + .. code-block:: c++ + + template + void appendValue(std::vector &dest, const T &value) { + dest. // Relevant completion results are now shown after '.' + } + + Note that code-completion results are still not provided when the member + expression includes a dependent base expression. For example: + + .. code-block:: c++ + + template + void appendValue(std::vector> &dest, const T &value) { + dest.at(0). // Libclang fails to provide completion results after '.' + } Static Analyzer --------------- diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h index 9d64f0244ec3..c39eaee9b124 100644 --- a/include/clang/AST/DeclCXX.h +++ b/include/clang/AST/DeclCXX.h @@ -375,6 +375,7 @@ class CXXRecordDecl : public RecordDecl { /// \brief These flags are \c true if a defaulted corresponding special /// member can't be fully analyzed without performing overload resolution. /// @{ + unsigned NeedOverloadResolutionForCopyConstructor : 1; unsigned NeedOverloadResolutionForMoveConstructor : 1; unsigned NeedOverloadResolutionForMoveAssignment : 1; unsigned NeedOverloadResolutionForDestructor : 1; @@ -383,6 +384,7 @@ class CXXRecordDecl : public RecordDecl { /// \brief These flags are \c true if an implicit defaulted corresponding /// special member would be defined as deleted. /// @{ + unsigned DefaultedCopyConstructorIsDeleted : 1; unsigned DefaultedMoveConstructorIsDeleted : 1; unsigned DefaultedMoveAssignmentIsDeleted : 1; unsigned DefaultedDestructorIsDeleted : 1; @@ -415,6 +417,12 @@ class CXXRecordDecl : public RecordDecl { /// constructor. unsigned HasDefaultedDefaultConstructor : 1; + /// \brief True if this class can be passed in a non-address-preserving + /// fashion (such as in registers) according to the C++ language rules. + /// This does not imply anything about how the ABI in use will actually + /// pass an object of this class. + unsigned CanPassInRegisters : 1; + /// \brief True if a defaulted default constructor for this class would /// be constexpr. unsigned DefaultedDefaultConstructorIsConstexpr : 1; @@ -811,18 +819,50 @@ class CXXRecordDecl : public RecordDecl { return data().FirstFriend.isValid(); } + /// \brief \c true if a defaulted copy constructor for this class would be + /// deleted. + bool defaultedCopyConstructorIsDeleted() const { + assert((!needsOverloadResolutionForCopyConstructor() || + (data().DeclaredSpecialMembers & SMF_CopyConstructor)) && + "this property has not yet been computed by Sema"); + return data().DefaultedCopyConstructorIsDeleted; + } + + /// \brief \c true if a defaulted move constructor for this class would be + /// deleted. + bool defaultedMoveConstructorIsDeleted() const { + assert((!needsOverloadResolutionForMoveConstructor() || + (data().DeclaredSpecialMembers & SMF_MoveConstructor)) && + "this property has not yet been computed by Sema"); + return data().DefaultedMoveConstructorIsDeleted; + } + + /// \brief \c true if a defaulted destructor for this class would be deleted. + bool defaultedDestructorIsDeleted() const { + return !data().DefaultedDestructorIsDeleted; + } + + /// \brief \c true if we know for sure that this class has a single, + /// accessible, unambiguous copy constructor that is not deleted. + bool hasSimpleCopyConstructor() const { + return !hasUserDeclaredCopyConstructor() && + !data().DefaultedCopyConstructorIsDeleted; + } + /// \brief \c true if we know for sure that this class has a single, /// accessible, unambiguous move constructor that is not deleted. bool hasSimpleMoveConstructor() const { return !hasUserDeclaredMoveConstructor() && hasMoveConstructor() && !data().DefaultedMoveConstructorIsDeleted; } + /// \brief \c true if we know for sure that this class has a single, /// accessible, unambiguous move assignment operator that is not deleted. bool hasSimpleMoveAssignment() const { return !hasUserDeclaredMoveAssignment() && hasMoveAssignment() && !data().DefaultedMoveAssignmentIsDeleted; } + /// \brief \c true if we know for sure that this class has an accessible /// destructor that is not deleted. bool hasSimpleDestructor() const { @@ -878,7 +918,16 @@ class CXXRecordDecl : public RecordDecl { /// \brief Determine whether we need to eagerly declare a defaulted copy /// constructor for this class. bool needsOverloadResolutionForCopyConstructor() const { - return data().HasMutableFields; + // C++17 [class.copy.ctor]p6: + // If the class definition declares a move constructor or move assignment + // operator, the implicitly declared copy constructor is defined as + // deleted. + // In MSVC mode, sometimes a declared move assignment does not delete an + // implicit copy constructor, so defer this choice to Sema. + if (data().UserDeclaredSpecialMembers & + (SMF_MoveConstructor | SMF_MoveAssignment)) + return true; + return data().NeedOverloadResolutionForCopyConstructor; } /// \brief Determine whether an implicit copy constructor for this type @@ -919,7 +968,16 @@ class CXXRecordDecl : public RecordDecl { needsImplicitMoveConstructor(); } - /// \brief Set that we attempted to declare an implicitly move + /// \brief Set that we attempted to declare an implicit copy + /// constructor, but overload resolution failed so we deleted it. + void setImplicitCopyConstructorIsDeleted() { + assert((data().DefaultedCopyConstructorIsDeleted || + needsOverloadResolutionForCopyConstructor()) && + "Copy constructor should not be deleted"); + data().DefaultedCopyConstructorIsDeleted = true; + } + + /// \brief Set that we attempted to declare an implicit move /// constructor, but overload resolution failed so we deleted it. void setImplicitMoveConstructorIsDeleted() { assert((data().DefaultedMoveConstructorIsDeleted || @@ -1316,6 +1374,18 @@ class CXXRecordDecl : public RecordDecl { return data().HasIrrelevantDestructor; } + /// \brief Determine whether this class has at least one trivial, non-deleted + /// copy or move constructor. + bool canPassInRegisters() const { + return data().CanPassInRegisters; + } + + /// \brief Set that we can pass this RecordDecl in registers. + // FIXME: This should be set as part of completeDefinition. + void setCanPassInRegisters(bool CanPass) { + data().CanPassInRegisters = CanPass; + } + /// \brief Determine whether this class has a non-literal or/ volatile type /// non-static data member or base class. bool hasNonLiteralTypeFieldsOrBases() const { diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index a058fbfbb4cf..dba4b80f6071 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -1048,10 +1048,6 @@ class Preprocessor { /// which implicitly adds the builtin defines etc. void EnterMainSourceFile(); - /// \brief After parser warm-up, initialize the conditional stack from - /// the preamble. - void replayPreambleConditionalStack(); - /// \brief Inform the preprocessor callbacks that processing is complete. void EndSourceFile(); @@ -2025,6 +2021,10 @@ class Preprocessor { } private: + /// \brief After processing predefined file, initialize the conditional stack from + /// the preamble. + void replayPreambleConditionalStack(); + // Macro handling. void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef); void HandleUndefDirective(); diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp index 6e33b98d2f18..2c0bb11cc4bc 100644 --- a/lib/AST/ASTImporter.cpp +++ b/lib/AST/ASTImporter.cpp @@ -956,12 +956,16 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To, ToData.HasUninitializedFields = FromData.HasUninitializedFields; ToData.HasInheritedConstructor = FromData.HasInheritedConstructor; ToData.HasInheritedAssignment = FromData.HasInheritedAssignment; + ToData.NeedOverloadResolutionForCopyConstructor + = FromData.NeedOverloadResolutionForCopyConstructor; ToData.NeedOverloadResolutionForMoveConstructor = FromData.NeedOverloadResolutionForMoveConstructor; ToData.NeedOverloadResolutionForMoveAssignment = FromData.NeedOverloadResolutionForMoveAssignment; ToData.NeedOverloadResolutionForDestructor = FromData.NeedOverloadResolutionForDestructor; + ToData.DefaultedCopyConstructorIsDeleted + = FromData.DefaultedCopyConstructorIsDeleted; ToData.DefaultedMoveConstructorIsDeleted = FromData.DefaultedMoveConstructorIsDeleted; ToData.DefaultedMoveAssignmentIsDeleted @@ -973,6 +977,7 @@ bool ASTNodeImporter::ImportDefinition(RecordDecl *From, RecordDecl *To, = FromData.HasConstexprNonCopyMoveConstructor; ToData.HasDefaultedDefaultConstructor = FromData.HasDefaultedDefaultConstructor; + ToData.CanPassInRegisters = FromData.CanPassInRegisters; ToData.DefaultedDefaultConstructorIsConstexpr = FromData.DefaultedDefaultConstructorIsConstexpr; ToData.HasConstexprDefaultConstructor diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp index 1caceab85eea..5782b7b56c96 100644 --- a/lib/AST/DeclCXX.cpp +++ b/lib/AST/DeclCXX.cpp @@ -55,15 +55,18 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) HasOnlyCMembers(true), HasInClassInitializer(false), HasUninitializedReferenceMember(false), HasUninitializedFields(false), HasInheritedConstructor(false), HasInheritedAssignment(false), + NeedOverloadResolutionForCopyConstructor(false), NeedOverloadResolutionForMoveConstructor(false), NeedOverloadResolutionForMoveAssignment(false), NeedOverloadResolutionForDestructor(false), + DefaultedCopyConstructorIsDeleted(false), DefaultedMoveConstructorIsDeleted(false), DefaultedMoveAssignmentIsDeleted(false), DefaultedDestructorIsDeleted(false), HasTrivialSpecialMembers(SMF_All), DeclaredNonTrivialSpecialMembers(0), HasIrrelevantDestructor(true), HasConstexprNonCopyMoveConstructor(false), HasDefaultedDefaultConstructor(false), + CanPassInRegisters(true), DefaultedDefaultConstructorIsConstexpr(true), HasConstexprDefaultConstructor(false), HasNonLiteralTypeFieldsOrBases(false), ComputedVisibleConversions(false), @@ -352,8 +355,10 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, setHasVolatileMember(true); // Keep track of the presence of mutable fields. - if (BaseClassDecl->hasMutableFields()) + if (BaseClassDecl->hasMutableFields()) { data().HasMutableFields = true; + data().NeedOverloadResolutionForCopyConstructor = true; + } if (BaseClassDecl->hasUninitializedReferenceMember()) data().HasUninitializedReferenceMember = true; @@ -406,6 +411,8 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { // -- a direct or virtual base class B that cannot be copied/moved [...] // -- a non-static data member of class type M (or array thereof) // that cannot be copied or moved [...] + if (!Subobj->hasSimpleCopyConstructor()) + data().NeedOverloadResolutionForCopyConstructor = true; if (!Subobj->hasSimpleMoveConstructor()) data().NeedOverloadResolutionForMoveConstructor = true; @@ -426,6 +433,7 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { // -- any non-static data member has a type with a destructor // that is deleted or inaccessible from the defaulted [ctor or dtor]. if (!Subobj->hasSimpleDestructor()) { + data().NeedOverloadResolutionForCopyConstructor = true; data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } @@ -711,8 +719,10 @@ void CXXRecordDecl::addedMember(Decl *D) { data().IsStandardLayout = false; // Keep track of the presence of mutable fields. - if (Field->isMutable()) + if (Field->isMutable()) { data().HasMutableFields = true; + data().NeedOverloadResolutionForCopyConstructor = true; + } // C++11 [class.union]p8, DR1460: // If X is a union, a non-static data member of X that is not an anonymous @@ -756,6 +766,12 @@ void CXXRecordDecl::addedMember(Decl *D) { // A standard-layout class is a class that: // -- has no non-static data members of type [...] reference, data().IsStandardLayout = false; + + // C++1z [class.copy.ctor]p10: + // A defaulted copy constructor for a class X is defined as deleted if X has: + // -- a non-static data member of rvalue reference type + if (T->isRValueReferenceType()) + data().DefaultedCopyConstructorIsDeleted = true; } if (!Field->hasInClassInitializer() && !Field->isMutable()) { @@ -809,6 +825,10 @@ void CXXRecordDecl::addedMember(Decl *D) { // We may need to perform overload resolution to determine whether a // field can be moved if it's const or volatile qualified. if (T.getCVRQualifiers() & (Qualifiers::Const | Qualifiers::Volatile)) { + // We need to care about 'const' for the copy constructor because an + // implicit copy constructor might be declared with a non-const + // parameter. + data().NeedOverloadResolutionForCopyConstructor = true; data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForMoveAssignment = true; } @@ -819,6 +839,8 @@ void CXXRecordDecl::addedMember(Decl *D) { // -- X is a union-like class that has a variant member with a // non-trivial [corresponding special member] if (isUnion()) { + if (FieldRec->hasNonTrivialCopyConstructor()) + data().DefaultedCopyConstructorIsDeleted = true; if (FieldRec->hasNonTrivialMoveConstructor()) data().DefaultedMoveConstructorIsDeleted = true; if (FieldRec->hasNonTrivialMoveAssignment()) @@ -830,6 +852,8 @@ void CXXRecordDecl::addedMember(Decl *D) { // For an anonymous union member, our overload resolution will perform // overload resolution for its members. if (Field->isAnonymousStructOrUnion()) { + data().NeedOverloadResolutionForCopyConstructor |= + FieldRec->data().NeedOverloadResolutionForCopyConstructor; data().NeedOverloadResolutionForMoveConstructor |= FieldRec->data().NeedOverloadResolutionForMoveConstructor; data().NeedOverloadResolutionForMoveAssignment |= @@ -915,8 +939,10 @@ void CXXRecordDecl::addedMember(Decl *D) { } // Keep track of the presence of mutable fields. - if (FieldRec->hasMutableFields()) + if (FieldRec->hasMutableFields()) { data().HasMutableFields = true; + data().NeedOverloadResolutionForCopyConstructor = true; + } // C++11 [class.copy]p13: // If the implicitly-defined constructor would satisfy the @@ -1450,7 +1476,7 @@ void CXXRecordDecl::completeDefinition() { void CXXRecordDecl::completeDefinition(CXXFinalOverriderMap *FinalOverriders) { RecordDecl::completeDefinition(); - + // If the class may be abstract (but hasn't been marked as such), check for // any pure final overriders. if (mayBeAbstract()) { diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp index e29e525edd24..033258643ddf 100644 --- a/lib/CodeGen/CGCXXABI.cpp +++ b/lib/CodeGen/CGCXXABI.cpp @@ -30,38 +30,9 @@ void CGCXXABI::ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S) { } bool CGCXXABI::canCopyArgument(const CXXRecordDecl *RD) const { - // If RD has a non-trivial move or copy constructor, we cannot copy the - // argument. - if (RD->hasNonTrivialCopyConstructor() || RD->hasNonTrivialMoveConstructor()) - return false; - - // If RD has a non-trivial destructor, we cannot copy the argument. - if (RD->hasNonTrivialDestructor()) - return false; - // We can only copy the argument if there exists at least one trivial, // non-deleted copy or move constructor. - // FIXME: This assumes that all lazily declared copy and move constructors are - // not deleted. This assumption might not be true in some corner cases. - bool CopyDeleted = false; - bool MoveDeleted = false; - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor() || CD->isMoveConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy or move ctor. Return - // directly. - if (!CD->isDeleted()) - return true; - if (CD->isCopyConstructor()) - CopyDeleted = true; - else - MoveDeleted = true; - } - } - - // If all trivial copy and move constructors are deleted, we cannot copy the - // argument. - return !(CopyDeleted && MoveDeleted); + return RD->canPassInRegisters(); } llvm::Constant *CGCXXABI::GetBogusMemberPointer(QualType T) { diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index c82b9677eacf..e7963674fc29 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -63,11 +63,8 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI { bool classifyReturnType(CGFunctionInfo &FI) const override; RecordArgABI getRecordArgABI(const CXXRecordDecl *RD) const override { - // Structures with either a non-trivial destructor or a non-trivial - // copy constructor are always indirect. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) + // If C++ prohibits us from making a copy, pass by address. + if (!canCopyArgument(RD)) return RAA_Indirect; return RAA_Default; } @@ -998,10 +995,8 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (!RD) return false; - // Return indirectly if we have a non-trivial copy ctor or non-trivial dtor. - // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared - // special members. - if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) { + // If C++ prohibits us from making a copy, return by address. + if (!canCopyArgument(RD)) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); return true; diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp index 78b510bb4665..1bd2937e4747 100644 --- a/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/lib/CodeGen/MicrosoftCXXABI.cpp @@ -819,46 +819,44 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const { return RAA_Default; case llvm::Triple::x86_64: - // Win64 passes objects with non-trivial copy ctors indirectly. - if (RD->hasNonTrivialCopyConstructor()) - return RAA_Indirect; - - // If an object has a destructor, we'd really like to pass it indirectly + // If a class has a destructor, we'd really like to pass it indirectly // because it allows us to elide copies. Unfortunately, MSVC makes that // impossible for small types, which it will pass in a single register or // stack slot. Most objects with dtors are large-ish, so handle that early. // We can't call out all large objects as being indirect because there are // multiple x64 calling conventions and the C++ ABI code shouldn't dictate // how we pass large POD types. + // + // Note: This permits small classes with nontrivial destructors to be + // passed in registers, which is non-conforming. if (RD->hasNonTrivialDestructor() && getContext().getTypeSize(RD->getTypeForDecl()) > 64) return RAA_Indirect; - // If this is true, the implicit copy constructor that Sema would have - // created would not be deleted. FIXME: We should provide a more direct way - // for CodeGen to ask whether the constructor was deleted. - if (!RD->hasUserDeclaredCopyConstructor() && - !RD->hasUserDeclaredMoveConstructor() && - !RD->needsOverloadResolutionForMoveConstructor() && - !RD->hasUserDeclaredMoveAssignment() && - !RD->needsOverloadResolutionForMoveAssignment()) - return RAA_Default; - - // Otherwise, Sema should have created an implicit copy constructor if - // needed. - assert(!RD->needsImplicitCopyConstructor()); - - // We have to make sure the trivial copy constructor isn't deleted. - for (const CXXConstructorDecl *CD : RD->ctors()) { - if (CD->isCopyConstructor()) { - assert(CD->isTrivial()); - // We had at least one undeleted trivial copy ctor. Return directly. - if (!CD->isDeleted()) - return RAA_Default; + // If a class has at least one non-deleted, trivial copy constructor, it + // is passed according to the C ABI. Otherwise, it is passed indirectly. + // + // Note: This permits classes with non-trivial copy or move ctors to be + // passed in registers, so long as they *also* have a trivial copy ctor, + // which is non-conforming. + if (RD->needsImplicitCopyConstructor()) { + // If the copy ctor has not yet been declared, we can read its triviality + // off the AST. + if (!RD->defaultedCopyConstructorIsDeleted() && + RD->hasTrivialCopyConstructor()) + return RAA_Default; + } else { + // Otherwise, we need to find the copy constructor(s) and ask. + for (const CXXConstructorDecl *CD : RD->ctors()) { + if (CD->isCopyConstructor()) { + // We had at least one nondeleted trivial copy ctor. Return directly. + if (!CD->isDeleted() && CD->isTrivial()) + return RAA_Default; + } } } - // The trivial copy constructor was deleted. Return indirectly. + // We have no trivial, non-deleted copy constructor. return RAA_Indirect; } diff --git a/lib/Driver/ToolChains/Darwin.cpp b/lib/Driver/ToolChains/Darwin.cpp index 6b7f0c71dfb7..32103a6120d4 100644 --- a/lib/Driver/ToolChains/Darwin.cpp +++ b/lib/Driver/ToolChains/Darwin.cpp @@ -1837,7 +1837,12 @@ Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, } bool MachO::IsUnwindTablesDefault(const ArgList &Args) const { - return !UseSjLjExceptions(Args); + // Unwind tables are not emitted if -fno-exceptions is supplied (except when + // targeting x86_64). + return getArch() == llvm::Triple::x86_64 || + (!UseSjLjExceptions(Args) && + Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, + true)); } bool MachO::UseDwarfDebugFlags() const { diff --git a/lib/Driver/ToolChains/MSVC.cpp b/lib/Driver/ToolChains/MSVC.cpp index b871c856d2a0..7978a6941cb8 100644 --- a/lib/Driver/ToolChains/MSVC.cpp +++ b/lib/Driver/ToolChains/MSVC.cpp @@ -76,7 +76,7 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName, // Check various environment variables to try and find a toolchain. static bool findVCToolChainViaEnvironment(std::string &Path, - bool &IsVS2017OrNewer) { + MSVCToolChain::ToolsetLayout &VSLayout) { // These variables are typically set by vcvarsall.bat // when launching a developer command prompt. if (llvm::Optional VCToolsInstallDir = @@ -84,7 +84,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path, // This is only set by newer Visual Studios, and it leads straight to // the toolchain directory. Path = std::move(*VCToolsInstallDir); - IsVS2017OrNewer = true; + VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer; return true; } if (llvm::Optional VCInstallDir = @@ -94,7 +94,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path, // so this check has to appear second. // In older Visual Studios, the VC directory is the toolchain. Path = std::move(*VCInstallDir); - IsVS2017OrNewer = false; + VSLayout = MSVCToolChain::ToolsetLayout::OlderVS; return true; } @@ -134,9 +134,16 @@ static bool findVCToolChainViaEnvironment(std::string &Path, } if (IsBin) { llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath); - if (llvm::sys::path::filename(ParentPath) == "VC") { + llvm::StringRef ParentFilename = llvm::sys::path::filename(ParentPath); + if (ParentFilename == "VC") { Path = ParentPath; - IsVS2017OrNewer = false; + VSLayout = MSVCToolChain::ToolsetLayout::OlderVS; + return true; + } + if (ParentFilename == "x86ret" || ParentFilename == "x86chk" + || ParentFilename == "amd64ret" || ParentFilename == "amd64chk") { + Path = ParentPath; + VSLayout = MSVCToolChain::ToolsetLayout::DevDivInternal; return true; } @@ -165,7 +172,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path, ToolChainPath = llvm::sys::path::parent_path(ToolChainPath); Path = ToolChainPath; - IsVS2017OrNewer = true; + VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer; return true; } @@ -181,7 +188,7 @@ static bool findVCToolChainViaEnvironment(std::string &Path, // This is the preferred way to discover new Visual Studios, as they're no // longer listed in the registry. static bool findVCToolChainViaSetupConfig(std::string &Path, - bool &IsVS2017OrNewer) { + MSVCToolChain::ToolsetLayout &VSLayout) { #if !defined(USE_MSVC_SETUP_API) return false; #else @@ -263,7 +270,7 @@ static bool findVCToolChainViaSetupConfig(std::string &Path, return false; Path = ToolchainPath.str(); - IsVS2017OrNewer = true; + VSLayout = MSVCToolChain::ToolsetLayout::VS2017OrNewer; return true; #endif } @@ -272,7 +279,7 @@ static bool findVCToolChainViaSetupConfig(std::string &Path, // a toolchain path. VS2017 and newer don't get added to the registry. // So if we find something here, we know that it's an older version. static bool findVCToolChainViaRegistry(std::string &Path, - bool &IsVS2017OrNewer) { + MSVCToolChain::ToolsetLayout &VSLayout) { std::string VSInstallPath; if (getSystemRegistryString(R"(SOFTWARE\Microsoft\VisualStudio\$VERSION)", "InstallDir", VSInstallPath, nullptr) || @@ -284,7 +291,7 @@ static bool findVCToolChainViaRegistry(std::string &Path, llvm::sys::path::append(VCPath, "VC"); Path = VCPath.str(); - IsVS2017OrNewer = false; + VSLayout = MSVCToolChain::ToolsetLayout::OlderVS; return true; } } @@ -475,6 +482,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // native target bin directory. // e.g. when compiling for x86 on an x64 host, PATH should start with: // /bin/HostX64/x86;/bin/HostX64/x64 + // This doesn't attempt to handle ToolsetLayout::DevDivInternal. if (TC.getIsVS2017OrNewer() && llvm::Triple(llvm::sys::getProcessTriple()).getArch() != TC.getArch()) { auto HostArch = llvm::Triple(llvm::sys::getProcessTriple()).getArch(); @@ -677,9 +685,9 @@ MSVCToolChain::MSVCToolChain(const Driver &D, const llvm::Triple &Triple, // what they want to use. // Failing that, just try to find the newest Visual Studio version we can // and use its default VC toolchain. - findVCToolChainViaEnvironment(VCToolChainPath, IsVS2017OrNewer) || - findVCToolChainViaSetupConfig(VCToolChainPath, IsVS2017OrNewer) || - findVCToolChainViaRegistry(VCToolChainPath, IsVS2017OrNewer); + findVCToolChainViaEnvironment(VCToolChainPath, VSLayout) || + findVCToolChainViaSetupConfig(VCToolChainPath, VSLayout) || + findVCToolChainViaRegistry(VCToolChainPath, VSLayout); } Tool *MSVCToolChain::buildLinker() const { @@ -766,6 +774,21 @@ static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) { } } +// Similar to the above function, but for DevDiv internal builds. +static const char *llvmArchToDevDivInternalArch(llvm::Triple::ArchType Arch) { + using ArchType = llvm::Triple::ArchType; + switch (Arch) { + case ArchType::x86: + return "i386"; + case ArchType::x86_64: + return "amd64"; + case ArchType::arm: + return "arm"; + default: + return ""; + } +} + // Get the path to a specific subdirectory in the current toolchain for // a given target architecture. // VS2017 changed the VC toolchain layout, so this should be used instead @@ -773,26 +796,40 @@ static const char *llvmArchToLegacyVCArch(llvm::Triple::ArchType Arch) { std::string MSVCToolChain::getSubDirectoryPath(SubDirectoryType Type, llvm::Triple::ArchType TargetArch) const { + const char *SubdirName; + const char *IncludeName; + switch (VSLayout) { + case ToolsetLayout::OlderVS: + SubdirName = llvmArchToLegacyVCArch(TargetArch); + IncludeName = "include"; + break; + case ToolsetLayout::VS2017OrNewer: + SubdirName = llvmArchToWindowsSDKArch(TargetArch); + IncludeName = "include"; + break; + case ToolsetLayout::DevDivInternal: + SubdirName = llvmArchToDevDivInternalArch(TargetArch); + IncludeName = "inc"; + break; + } + llvm::SmallString<256> Path(VCToolChainPath); switch (Type) { case SubDirectoryType::Bin: - if (IsVS2017OrNewer) { - bool HostIsX64 = + if (VSLayout == ToolsetLayout::VS2017OrNewer) { + const bool HostIsX64 = llvm::Triple(llvm::sys::getProcessTriple()).isArch64Bit(); - llvm::sys::path::append(Path, "bin", (HostIsX64 ? "HostX64" : "HostX86"), - llvmArchToWindowsSDKArch(TargetArch)); - - } else { - llvm::sys::path::append(Path, "bin", llvmArchToLegacyVCArch(TargetArch)); + const char *const HostName = HostIsX64 ? "HostX64" : "HostX86"; + llvm::sys::path::append(Path, "bin", HostName, SubdirName); + } else { // OlderVS or DevDivInternal + llvm::sys::path::append(Path, "bin", SubdirName); } break; case SubDirectoryType::Include: - llvm::sys::path::append(Path, "include"); + llvm::sys::path::append(Path, IncludeName); break; case SubDirectoryType::Lib: - llvm::sys::path::append( - Path, "lib", IsVS2017OrNewer ? llvmArchToWindowsSDKArch(TargetArch) - : llvmArchToLegacyVCArch(TargetArch)); + llvm::sys::path::append(Path, "lib", SubdirName); break; } return Path.str(); diff --git a/lib/Driver/ToolChains/MSVC.h b/lib/Driver/ToolChains/MSVC.h index d153691a5c90..854f88a36fd2 100644 --- a/lib/Driver/ToolChains/MSVC.h +++ b/lib/Driver/ToolChains/MSVC.h @@ -92,7 +92,12 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { return getSubDirectoryPath(Type, getArch()); } - bool getIsVS2017OrNewer() const { return IsVS2017OrNewer; } + enum class ToolsetLayout { + OlderVS, + VS2017OrNewer, + DevDivInternal, + }; + bool getIsVS2017OrNewer() const { return VSLayout == ToolsetLayout::VS2017OrNewer; } void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, @@ -130,7 +135,7 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { Tool *buildAssembler() const override; private: std::string VCToolChainPath; - bool IsVS2017OrNewer = false; + ToolsetLayout VSLayout = ToolsetLayout::OlderVS; CudaInstallationDetector CudaInstallation; }; diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp index 4b4fd13145fb..b1a5f1eab552 100644 --- a/lib/Format/WhitespaceManager.cpp +++ b/lib/Format/WhitespaceManager.cpp @@ -472,9 +472,14 @@ void WhitespaceManager::alignTrailingComments() { continue; unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - unsigned ChangeMaxColumn = Style.ColumnLimit >= Changes[i].TokenLength - ? Style.ColumnLimit - Changes[i].TokenLength - : ChangeMinColumn; + unsigned ChangeMaxColumn; + + if (Style.ColumnLimit == 0) + ChangeMaxColumn = UINT_MAX; + else if (Style.ColumnLimit >= Changes[i].TokenLength) + ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; + else + ChangeMaxColumn = ChangeMinColumn; // If we don't create a replacement for this change, we have to consider // it to be immovable. diff --git a/lib/Headers/unwind.h b/lib/Headers/unwind.h index e94b00b57c26..4f74a3478740 100644 --- a/lib/Headers/unwind.h +++ b/lib/Headers/unwind.h @@ -76,13 +76,7 @@ typedef intptr_t _sleb128_t; typedef uintptr_t _uleb128_t; struct _Unwind_Context; -#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___)) -struct _Unwind_Control_Block; -typedef struct _Unwind_Control_Block _Unwind_Exception; /* Alias */ -#else struct _Unwind_Exception; -typedef struct _Unwind_Exception _Unwind_Exception; -#endif typedef enum { _URC_NO_REASON = 0, #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ @@ -115,42 +109,8 @@ typedef enum { } _Unwind_Action; typedef void (*_Unwind_Exception_Cleanup_Fn)(_Unwind_Reason_Code, - _Unwind_Exception *); + struct _Unwind_Exception *); -#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___)) -typedef struct _Unwind_Control_Block _Unwind_Control_Block; -typedef uint32_t _Unwind_EHT_Header; - -struct _Unwind_Control_Block { - uint64_t exception_class; - void (*exception_cleanup)(_Unwind_Reason_Code, _Unwind_Control_Block *); - /* unwinder cache (private fields for the unwinder's use) */ - struct { - uint32_t reserved1; /* forced unwind stop function, 0 if not forced */ - uint32_t reserved2; /* personality routine */ - uint32_t reserved3; /* callsite */ - uint32_t reserved4; /* forced unwind stop argument */ - uint32_t reserved5; - } unwinder_cache; - /* propagation barrier cache (valid after phase 1) */ - struct { - uint32_t sp; - uint32_t bitpattern[5]; - } barrier_cache; - /* cleanup cache (preserved over cleanup) */ - struct { - uint32_t bitpattern[4]; - } cleanup_cache; - /* personality cache (for personality's benefit) */ - struct { - uint32_t fnstart; /* function start address */ - _Unwind_EHT_Header *ehtp; /* pointer to EHT entry header word */ - uint32_t additional; /* additional data */ - uint32_t reserved1; - } pr_cache; - long long int : 0; /* force alignment of next item to 8-byte boundary */ -}; -#else struct _Unwind_Exception { _Unwind_Exception_Class exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; @@ -160,24 +120,23 @@ struct _Unwind_Exception { * aligned". GCC has interpreted this to mean "use the maximum useful * alignment for the target"; so do we. */ } __attribute__((__aligned__)); -#endif typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn)(int, _Unwind_Action, _Unwind_Exception_Class, - _Unwind_Exception *, + struct _Unwind_Exception *, struct _Unwind_Context *, void *); -typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)(int, _Unwind_Action, - _Unwind_Exception_Class, - _Unwind_Exception *, - struct _Unwind_Context *); +typedef _Unwind_Reason_Code (*_Unwind_Personality_Fn)( + int, _Unwind_Action, _Unwind_Exception_Class, struct _Unwind_Exception *, + struct _Unwind_Context *); typedef _Unwind_Personality_Fn __personality_routine; typedef _Unwind_Reason_Code (*_Unwind_Trace_Fn)(struct _Unwind_Context *, void *); -#if defined(__arm__) && !(defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH___)) +#if defined(__arm__) && !defined(__APPLE__) + typedef enum { _UVRSC_CORE = 0, /* integer register */ _UVRSC_VFP = 1, /* vfp */ @@ -199,12 +158,14 @@ typedef enum { _UVRSR_FAILED = 2 } _Unwind_VRS_Result; +#if !defined(__USING_SJLJ_EXCEPTIONS__) && !defined(__ARM_DWARF_EH__) typedef uint32_t _Unwind_State; #define _US_VIRTUAL_UNWIND_FRAME ((_Unwind_State)0) #define _US_UNWIND_FRAME_STARTING ((_Unwind_State)1) #define _US_UNWIND_FRAME_RESUME ((_Unwind_State)2) #define _US_ACTION_MASK ((_Unwind_State)3) #define _US_FORCE_UNWIND ((_Unwind_State)8) +#endif _Unwind_VRS_Result _Unwind_VRS_Get(struct _Unwind_Context *__context, _Unwind_VRS_RegClass __regclass, @@ -263,12 +224,13 @@ _Unwind_Ptr _Unwind_GetRegionStart(struct _Unwind_Context *); /* DWARF EH functions; currently not available on Darwin/ARM */ #if !defined(__APPLE__) || !defined(__arm__) -_Unwind_Reason_Code _Unwind_RaiseException(_Unwind_Exception *); -_Unwind_Reason_Code _Unwind_ForcedUnwind(_Unwind_Exception *, _Unwind_Stop_Fn, - void *); -void _Unwind_DeleteException(_Unwind_Exception *); -void _Unwind_Resume(_Unwind_Exception *); -_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(_Unwind_Exception *); + +_Unwind_Reason_Code _Unwind_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_ForcedUnwind(struct _Unwind_Exception *, + _Unwind_Stop_Fn, void *); +void _Unwind_DeleteException(struct _Unwind_Exception *); +void _Unwind_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_Resume_or_Rethrow(struct _Unwind_Exception *); #endif @@ -279,11 +241,11 @@ typedef struct SjLj_Function_Context *_Unwind_FunctionContext_t; void _Unwind_SjLj_Register(_Unwind_FunctionContext_t); void _Unwind_SjLj_Unregister(_Unwind_FunctionContext_t); -_Unwind_Reason_Code _Unwind_SjLj_RaiseException(_Unwind_Exception *); -_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(_Unwind_Exception *, +_Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_ForcedUnwind(struct _Unwind_Exception *, _Unwind_Stop_Fn, void *); -void _Unwind_SjLj_Resume(_Unwind_Exception *); -_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(_Unwind_Exception *); +void _Unwind_SjLj_Resume(struct _Unwind_Exception *); +_Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *); void *_Unwind_FindEnclosingFunction(void *); diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp index 5a589d6a17b3..36d7028da688 100644 --- a/lib/Lex/PPLexerChange.cpp +++ b/lib/Lex/PPLexerChange.cpp @@ -458,10 +458,16 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { SourceMgr.setNumCreatedFIDsForFileID(CurPPLexer->getFileID(), NumFIDs); } + bool ExitedFromPredefinesFile = false; FileID ExitedFID; - if (Callbacks && !isEndOfMacro && CurPPLexer) + if (!isEndOfMacro && CurPPLexer) { ExitedFID = CurPPLexer->getFileID(); + assert(PredefinesFileID.isValid() && + "HandleEndOfFile is called before PredefinesFileId is set"); + ExitedFromPredefinesFile = (PredefinesFileID == ExitedFID); + } + if (LeavingSubmodule) { // We're done with this submodule. Module *M = LeaveSubmodule(/*ForPragma*/false); @@ -489,6 +495,11 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) { PPCallbacks::ExitFile, FileType, ExitedFID); } + // Restore conditional stack from the preamble right after exiting from the + // predefines file. + if (ExitedFromPredefinesFile) + replayPreambleConditionalStack(); + // Client should lex another token unless we generated an EOM. return LeavingSubmodule; } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index d1dc8e1c0010..7979be773aa1 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -540,6 +540,8 @@ void Preprocessor::EnterMainSourceFile() { void Preprocessor::replayPreambleConditionalStack() { // Restore the conditional stack from the preamble, if there is one. if (PreambleConditionalStack.isReplaying()) { + assert(CurPPLexer && + "CurPPLexer is null when calling replayPreambleConditionalStack."); CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack()); PreambleConditionalStack.doneReplaying(); } diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp index 4aa9a5971929..1ed7ef966358 100644 --- a/lib/Parse/Parser.cpp +++ b/lib/Parse/Parser.cpp @@ -516,8 +516,6 @@ void Parser::Initialize() { // Prime the lexer look-ahead. ConsumeToken(); - - PP.replayPreambleConditionalStack(); } void Parser::LateTemplateParserCleanupCallback(void *P) { diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp index e9070881afe4..c05e5f020708 100644 --- a/lib/Sema/SemaDeclCXX.cpp +++ b/lib/Sema/SemaDeclCXX.cpp @@ -5726,6 +5726,53 @@ static void DefineImplicitSpecialMember(Sema &S, CXXMethodDecl *MD, } } +/// Determine whether a type is permitted to be passed or returned in +/// registers, per C++ [class.temporary]p3. +static bool computeCanPassInRegisters(Sema &S, CXXRecordDecl *D) { + if (D->isDependentType() || D->isInvalidDecl()) + return false; + + // Per C++ [class.temporary]p3, the relevant condition is: + // each copy constructor, move constructor, and destructor of X is + // either trivial or deleted, and X has at least one non-deleted copy + // or move constructor + bool HasNonDeletedCopyOrMove = false; + + if (D->needsImplicitCopyConstructor() && + !D->defaultedCopyConstructorIsDeleted()) { + if (!D->hasTrivialCopyConstructor()) + return false; + HasNonDeletedCopyOrMove = true; + } + + if (S.getLangOpts().CPlusPlus11 && D->needsImplicitMoveConstructor() && + !D->defaultedMoveConstructorIsDeleted()) { + if (!D->hasTrivialMoveConstructor()) + return false; + HasNonDeletedCopyOrMove = true; + } + + if (D->needsImplicitDestructor() && !D->defaultedDestructorIsDeleted() && + !D->hasTrivialDestructor()) + return false; + + for (const CXXMethodDecl *MD : D->methods()) { + if (MD->isDeleted()) + continue; + + auto *CD = dyn_cast(MD); + if (CD && CD->isCopyOrMoveConstructor()) + HasNonDeletedCopyOrMove = true; + else if (!isa(MD)) + continue; + + if (!MD->isTrivial()) + return false; + } + + return HasNonDeletedCopyOrMove; +} + /// \brief Perform semantic checks on a class definition that has been /// completing, introducing implicitly-declared members, checking for /// abstract types, etc. @@ -5870,6 +5917,8 @@ void Sema::CheckCompletedCXXClass(CXXRecordDecl *Record) { } checkClassLevelDLLAttribute(Record); + + Record->setCanPassInRegisters(computeCanPassInRegisters(*this, Record)); } /// Look up the special member function that would be called by a special @@ -7496,8 +7545,7 @@ void Sema::ActOnFinishCXXMemberSpecification(Scope* S, SourceLocation RLoc, reinterpret_cast(FieldCollector->getCurFields()), FieldCollector->getCurNumFields()), LBrac, RBrac, AttrList); - CheckCompletedCXXClass( - dyn_cast_or_null(TagDecl)); + CheckCompletedCXXClass(dyn_cast_or_null(TagDecl)); } /// AddImplicitlyDeclaredMembersToClass - Adds any implicitly-declared @@ -11929,8 +11977,10 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( Scope *S = getScopeForContext(ClassDecl); CheckImplicitSpecialMemberDeclaration(S, CopyConstructor); - if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor)) + if (ShouldDeleteSpecialMember(CopyConstructor, CXXCopyConstructor)) { + ClassDecl->setImplicitCopyConstructorIsDeleted(); SetDeclDeleted(CopyConstructor, ClassLoc); + } if (S) PushOnScopeChains(CopyConstructor, S, false); diff --git a/lib/Sema/SemaObjCProperty.cpp b/lib/Sema/SemaObjCProperty.cpp index e1e85dfd5e55..bfb0071a54f9 100644 --- a/lib/Sema/SemaObjCProperty.cpp +++ b/lib/Sema/SemaObjCProperty.cpp @@ -872,7 +872,7 @@ SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc, } QualType RHSType = S.Context.getCanonicalType(Property->getType()); - unsigned OriginalAttributes = Property->getPropertyAttributes(); + unsigned OriginalAttributes = Property->getPropertyAttributesAsWritten(); enum MismatchKind { IncompatibleType = 0, HasNoExpectedAttribute, @@ -890,7 +890,7 @@ SelectPropertyForSynthesisFromProtocols(Sema &S, SourceLocation AtLoc, SmallVector Mismatches; for (ObjCPropertyDecl *Prop : Properties) { // Verify the property attributes. - unsigned Attr = Prop->getPropertyAttributes(); + unsigned Attr = Prop->getPropertyAttributesAsWritten(); if (Attr != OriginalAttributes) { auto Diag = [&](bool OriginalHasAttribute, StringRef AttributeName) { MismatchKind Kind = OriginalHasAttribute ? HasNoExpectedAttribute diff --git a/lib/Serialization/ASTReaderDecl.cpp b/lib/Serialization/ASTReaderDecl.cpp index abed2586561a..085341571ced 100644 --- a/lib/Serialization/ASTReaderDecl.cpp +++ b/lib/Serialization/ASTReaderDecl.cpp @@ -1559,9 +1559,11 @@ void ASTDeclReader::ReadCXXDefinitionData( Data.HasUninitializedFields = Record.readInt(); Data.HasInheritedConstructor = Record.readInt(); Data.HasInheritedAssignment = Record.readInt(); + Data.NeedOverloadResolutionForCopyConstructor = Record.readInt(); Data.NeedOverloadResolutionForMoveConstructor = Record.readInt(); Data.NeedOverloadResolutionForMoveAssignment = Record.readInt(); Data.NeedOverloadResolutionForDestructor = Record.readInt(); + Data.DefaultedCopyConstructorIsDeleted = Record.readInt(); Data.DefaultedMoveConstructorIsDeleted = Record.readInt(); Data.DefaultedMoveAssignmentIsDeleted = Record.readInt(); Data.DefaultedDestructorIsDeleted = Record.readInt(); @@ -1570,6 +1572,7 @@ void ASTDeclReader::ReadCXXDefinitionData( Data.HasIrrelevantDestructor = Record.readInt(); Data.HasConstexprNonCopyMoveConstructor = Record.readInt(); Data.HasDefaultedDefaultConstructor = Record.readInt(); + Data.CanPassInRegisters = Record.readInt(); Data.DefaultedDefaultConstructorIsConstexpr = Record.readInt(); Data.HasConstexprDefaultConstructor = Record.readInt(); Data.HasNonLiteralTypeFieldsOrBases = Record.readInt(); @@ -1697,9 +1700,11 @@ void ASTDeclReader::MergeDefinitionData( MATCH_FIELD(HasUninitializedFields) MATCH_FIELD(HasInheritedConstructor) MATCH_FIELD(HasInheritedAssignment) + MATCH_FIELD(NeedOverloadResolutionForCopyConstructor) MATCH_FIELD(NeedOverloadResolutionForMoveConstructor) MATCH_FIELD(NeedOverloadResolutionForMoveAssignment) MATCH_FIELD(NeedOverloadResolutionForDestructor) + MATCH_FIELD(DefaultedCopyConstructorIsDeleted) MATCH_FIELD(DefaultedMoveConstructorIsDeleted) MATCH_FIELD(DefaultedMoveAssignmentIsDeleted) MATCH_FIELD(DefaultedDestructorIsDeleted) @@ -1708,6 +1713,7 @@ void ASTDeclReader::MergeDefinitionData( MATCH_FIELD(HasIrrelevantDestructor) OR_FIELD(HasConstexprNonCopyMoveConstructor) OR_FIELD(HasDefaultedDefaultConstructor) + MATCH_FIELD(CanPassInRegisters) MATCH_FIELD(DefaultedDefaultConstructorIsConstexpr) OR_FIELD(HasConstexprDefaultConstructor) MATCH_FIELD(HasNonLiteralTypeFieldsOrBases) diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index a875e627bdfb..128e53b91b1d 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -5874,9 +5874,11 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(Data.HasUninitializedFields); Record->push_back(Data.HasInheritedConstructor); Record->push_back(Data.HasInheritedAssignment); + Record->push_back(Data.NeedOverloadResolutionForCopyConstructor); Record->push_back(Data.NeedOverloadResolutionForMoveConstructor); Record->push_back(Data.NeedOverloadResolutionForMoveAssignment); Record->push_back(Data.NeedOverloadResolutionForDestructor); + Record->push_back(Data.DefaultedCopyConstructorIsDeleted); Record->push_back(Data.DefaultedMoveConstructorIsDeleted); Record->push_back(Data.DefaultedMoveAssignmentIsDeleted); Record->push_back(Data.DefaultedDestructorIsDeleted); @@ -5885,6 +5887,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) { Record->push_back(Data.HasIrrelevantDestructor); Record->push_back(Data.HasConstexprNonCopyMoveConstructor); Record->push_back(Data.HasDefaultedDefaultConstructor); + Record->push_back(Data.CanPassInRegisters); Record->push_back(Data.DefaultedDefaultConstructorIsConstexpr); Record->push_back(Data.HasConstexprDefaultConstructor); Record->push_back(Data.HasNonLiteralTypeFieldsOrBases); diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp index 28f78fa3ff5e..11902f66df91 100644 --- a/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -409,6 +409,19 @@ class RegionStoreManager : public StoreManager { // BindDefault is only used to initialize a region with a default value. StoreRef BindDefault(Store store, const MemRegion *R, SVal V) override { + // FIXME: The offsets of empty bases can be tricky because of + // of the so called "empty base class optimization". + // If a base class has been optimized out + // we should not try to create a binding, otherwise we should. + // Unfortunately, at the moment ASTRecordLayout doesn't expose + // the actual sizes of the empty bases + // and trying to infer them from offsets/alignments + // seems to be error-prone and non-trivial because of the trailing padding. + // As a temporary mitigation we don't create bindings for empty bases. + if (R->getKind() == MemRegion::CXXBaseObjectRegionKind && + cast(R)->getDecl()->isEmpty()) + return StoreRef(store, *this); + RegionBindingsRef B = getRegionBindings(store); assert(!B.lookup(R, BindingKey::Direct)); diff --git a/test/Analysis/ctor.mm b/test/Analysis/ctor.mm index 646229aac989..619e2cb0f044 100644 --- a/test/Analysis/ctor.mm +++ b/test/Analysis/ctor.mm @@ -704,3 +704,20 @@ namespace PR19579 { }; } } + +namespace NoCrashOnEmptyBaseOptimization { + struct NonEmptyBase { + int X; + explicit NonEmptyBase(int X) : X(X) {} + }; + + struct EmptyBase {}; + + struct S : NonEmptyBase, EmptyBase { + S() : NonEmptyBase(0), EmptyBase() {} + }; + + void testSCtorNoCrash() { + S s; + } +} diff --git a/test/CodeGenCXX/uncopyable-args.cpp b/test/CodeGenCXX/uncopyable-args.cpp index 307a5cf11b6b..ef7168cdaaf7 100644 --- a/test/CodeGenCXX/uncopyable-args.cpp +++ b/test/CodeGenCXX/uncopyable-args.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-msvc -emit-llvm -o - %s | FileCheck %s -check-prefix=WIN64 +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-msvc -emit-llvm -o - %s -fms-compatibility -fms-compatibility-version=18 | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-18 +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-msvc -emit-llvm -o - %s -fms-compatibility -fms-compatibility-version=19 | FileCheck %s -check-prefix=WIN64 -check-prefix=WIN64-19 namespace trivial { // Trivial structs should be passed directly. @@ -52,12 +53,11 @@ void foo(A); void bar() { foo({}); } -// FIXME: The copy ctor is implicitly deleted. -// CHECK-DISABLED-LABEL: define void @_ZN9move_ctor3barEv() -// CHECK-DISABLED: call void @_Z{{.*}}C1Ev( -// CHECK-DISABLED-NOT: call -// CHECK-DISABLED: call void @_ZN9move_ctor3fooENS_1AE(%"struct.move_ctor::A"* %{{.*}}) -// CHECK-DISABLED-LABEL: declare void @_ZN9move_ctor3fooENS_1AE(%"struct.move_ctor::A"*) +// CHECK-LABEL: define void @_ZN9move_ctor3barEv() +// CHECK: call void @_Z{{.*}}C1Ev( +// CHECK-NOT: call +// CHECK: call void @_ZN9move_ctor3fooENS_1AE(%"struct.move_ctor::A"* %{{.*}}) +// CHECK-LABEL: declare void @_ZN9move_ctor3fooENS_1AE(%"struct.move_ctor::A"*) // WIN64-LABEL: declare void @"\01?foo@move_ctor@@YAXUA@1@@Z"(%"struct.move_ctor::A"*) } @@ -73,12 +73,11 @@ void foo(A); void bar() { foo({}); } -// FIXME: The copy ctor is deleted. -// CHECK-DISABLED-LABEL: define void @_ZN11all_deleted3barEv() -// CHECK-DISABLED: call void @_Z{{.*}}C1Ev( -// CHECK-DISABLED-NOT: call -// CHECK-DISABLED: call void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"* %{{.*}}) -// CHECK-DISABLED-LABEL: declare void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"*) +// CHECK-LABEL: define void @_ZN11all_deleted3barEv() +// CHECK: call void @_Z{{.*}}C1Ev( +// CHECK-NOT: call +// CHECK: call void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"* %{{.*}}) +// CHECK-LABEL: declare void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"*) // WIN64-LABEL: declare void @"\01?foo@all_deleted@@YAXUA@1@@Z"(%"struct.all_deleted::A"*) } @@ -93,14 +92,15 @@ void foo(A); void bar() { foo({}); } -// FIXME: The copy and move ctors are implicitly deleted. -// CHECK-DISABLED-LABEL: define void @_ZN18implicitly_deleted3barEv() -// CHECK-DISABLED: call void @_Z{{.*}}C1Ev( -// CHECK-DISABLED-NOT: call -// CHECK-DISABLED: call void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"* %{{.*}}) -// CHECK-DISABLED-LABEL: declare void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"*) +// CHECK-LABEL: define void @_ZN18implicitly_deleted3barEv() +// CHECK: call void @_Z{{.*}}C1Ev( +// CHECK-NOT: call +// CHECK: call void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"* %{{.*}}) +// CHECK-LABEL: declare void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"*) -// WIN64-LABEL: declare void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(%"struct.implicitly_deleted::A"*) +// In MSVC 2013, the copy ctor is not deleted by a move assignment. In MSVC 2015, it is. +// WIN64-18-LABEL: declare void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(i64 +// WIN64-19-LABEL: declare void @"\01?foo@implicitly_deleted@@YAXUA@1@@Z"(%"struct.implicitly_deleted::A"*) } namespace one_deleted { @@ -113,12 +113,11 @@ void foo(A); void bar() { foo({}); } -// FIXME: The copy constructor is implicitly deleted. -// CHECK-DISABLED-LABEL: define void @_ZN11one_deleted3barEv() -// CHECK-DISABLED: call void @_Z{{.*}}C1Ev( -// CHECK-DISABLED-NOT: call -// CHECK-DISABLED: call void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"* %{{.*}}) -// CHECK-DISABLED-LABEL: declare void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"*) +// CHECK-LABEL: define void @_ZN11one_deleted3barEv() +// CHECK: call void @_Z{{.*}}C1Ev( +// CHECK-NOT: call +// CHECK: call void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"* %{{.*}}) +// CHECK-LABEL: declare void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"*) // WIN64-LABEL: declare void @"\01?foo@one_deleted@@YAXUA@1@@Z"(%"struct.one_deleted::A"*) } @@ -195,12 +194,10 @@ void foo(B); void bar() { foo({}); } -// FIXME: This class has a non-trivial copy ctor and a trivial copy ctor. It's -// not clear whether we should pass by address or in registers. -// CHECK-DISABLED-LABEL: define void @_ZN14two_copy_ctors3barEv() -// CHECK-DISABLED: call void @_Z{{.*}}C1Ev( -// CHECK-DISABLED: call void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"* %{{.*}}) -// CHECK-DISABLED-LABEL: declare void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"*) +// CHECK-LABEL: define void @_ZN14two_copy_ctors3barEv() +// CHECK: call void @_Z{{.*}}C1Ev( +// CHECK: call void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"* %{{.*}}) +// CHECK-LABEL: declare void @_ZN14two_copy_ctors3fooENS_1BE(%"struct.two_copy_ctors::B"*) // WIN64-LABEL: declare void @"\01?foo@two_copy_ctors@@YAXUB@1@@Z"(%"struct.two_copy_ctors::B"*) } @@ -212,6 +209,7 @@ struct A { void *p; }; void *foo(A a) { return a.p; } +// CHECK-LABEL: define i8* @_ZN15definition_only3fooENS_1AE(%"struct.definition_only::A"* // WIN64-LABEL: define i8* @"\01?foo@definition_only@@YAPEAXUA@1@@Z"(%"struct.definition_only::A"* } @@ -226,6 +224,7 @@ struct A { B b; }; void *foo(A a) { return a.b.p; } +// CHECK-LABEL: define i8* @_ZN17deleted_by_member3fooENS_1AE(%"struct.deleted_by_member::A"* // WIN64-LABEL: define i8* @"\01?foo@deleted_by_member@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member::A"* } @@ -239,6 +238,7 @@ struct A : B { A(); }; void *foo(A a) { return a.p; } +// CHECK-LABEL: define i8* @_ZN15deleted_by_base3fooENS_1AE(%"struct.deleted_by_base::A"* // WIN64-LABEL: define i8* @"\01?foo@deleted_by_base@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base::A"* } @@ -253,6 +253,7 @@ struct A { B b; }; void *foo(A a) { return a.b.p; } +// CHECK-LABEL: define i8* @_ZN22deleted_by_member_copy3fooENS_1AE(%"struct.deleted_by_member_copy::A"* // WIN64-LABEL: define i8* @"\01?foo@deleted_by_member_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_member_copy::A"* } @@ -266,6 +267,7 @@ struct A : B { A(); }; void *foo(A a) { return a.p; } +// CHECK-LABEL: define i8* @_ZN20deleted_by_base_copy3fooENS_1AE(%"struct.deleted_by_base_copy::A"* // WIN64-LABEL: define i8* @"\01?foo@deleted_by_base_copy@@YAPEAXUA@1@@Z"(%"struct.deleted_by_base_copy::A"* } @@ -275,6 +277,75 @@ struct A { A(const A &o) = delete; void *p; }; +// CHECK-LABEL: define i8* @_ZN15explicit_delete3fooENS_1AE(%"struct.explicit_delete::A"* // WIN64-LABEL: define i8* @"\01?foo@explicit_delete@@YAPEAXUA@1@@Z"(%"struct.explicit_delete::A"* void *foo(A a) { return a.p; } } + +namespace implicitly_deleted_copy_ctor { +struct A { + // No move ctor due to copy assignment. + A &operator=(const A&); + // Deleted copy ctor due to rvalue ref member. + int &&ref; +}; +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1AE(%"struct.implicitly_deleted_copy_ctor::A"* +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAAEAHUA@1@@Z"(%"struct.implicitly_deleted_copy_ctor::A"* +int &foo(A a) { return a.ref; } + +struct B { + // Passed direct: has non-deleted trivial copy ctor. + B &operator=(const B&); + int &ref; +}; +int &foo(B b) { return b.ref; } +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1BE(i32* +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAAEAHUB@1@@Z"(i64 + +struct X { X(const X&); }; +struct Y { Y(const Y&) = default; }; + +union C { + C &operator=(const C&); + // Passed indirect: copy ctor deleted due to variant member with nontrivial copy ctor. + X x; + int n; +}; +int foo(C c) { return c.n; } +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1CE(%"union.implicitly_deleted_copy_ctor::C"* +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAHTC@1@@Z"(%"union.implicitly_deleted_copy_ctor::C"* + +struct D { + D &operator=(const D&); + // Passed indirect: copy ctor deleted due to variant member with nontrivial copy ctor. + union { + X x; + int n; + }; +}; +int foo(D d) { return d.n; } +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1DE(%"struct.implicitly_deleted_copy_ctor::D"* +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAHUD@1@@Z"(%"struct.implicitly_deleted_copy_ctor::D"* + +union E { + // Passed direct: has non-deleted trivial copy ctor. + E &operator=(const E&); + Y y; + int n; +}; +int foo(E e) { return e.n; } +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1EE(i32 +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAHTE@1@@Z"(i32 + +struct F { + // Passed direct: has non-deleted trivial copy ctor. + F &operator=(const F&); + union { + Y y; + int n; + }; +}; +int foo(F f) { return f.n; } +// CHECK-LABEL: define {{.*}} @_ZN28implicitly_deleted_copy_ctor3fooENS_1FE(i32 +// WIN64-LABEL: define {{.*}} @"\01?foo@implicitly_deleted_copy_ctor@@YAHUF@1@@Z"(i32 +} diff --git a/test/Driver/clang-translation.c b/test/Driver/clang-translation.c index 545951d5aa11..3b30f7af76dc 100644 --- a/test/Driver/clang-translation.c +++ b/test/Driver/clang-translation.c @@ -73,6 +73,10 @@ // RUN: FileCheck -check-prefix=ARM64-APPLE %s // ARM64-APPLE: -munwind-table +// RUN: %clang -target arm64-apple-ios10 -fno-exceptions -### -S %s -arch arm64 2>&1 | \ +// RUN: FileCheck -check-prefix=ARM64-APPLE-EXCEP %s +// ARM64-APPLE-EXCEP-NOT: -munwind-table + // RUN: %clang -target armv7k-apple-watchos4.0 -### -S %s -arch armv7k 2>&1 | \ // RUN: FileCheck -check-prefix=ARMV7K-APPLE %s // ARMV7K-APPLE: -munwind-table diff --git a/test/Index/preamble-conditionals-crash.cpp b/test/Index/preamble-conditionals-crash.cpp new file mode 100644 index 000000000000..6b18c87d19f9 --- /dev/null +++ b/test/Index/preamble-conditionals-crash.cpp @@ -0,0 +1,12 @@ +#ifndef HEADER_GUARD + +#define FOO int aba; +FOO + +#endif +// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-load-source-reparse 5 \ +// RUN: local -std=c++14 %s 2>&1 \ +// RUN: | FileCheck %s --implicit-check-not "libclang: crash detected" \ +// RUN: --implicit-check-not "error:" +// CHECK: macro expansion=FOO:3:9 Extent=[4:1 - 4:4] +// CHECK: VarDecl=aba:4:1 (Definition) Extent=[4:1 - 4:4] diff --git a/test/Index/preamble-conditionals.cpp b/test/Index/preamble-conditionals.cpp new file mode 100644 index 000000000000..81ef8265e829 --- /dev/null +++ b/test/Index/preamble-conditionals.cpp @@ -0,0 +1,8 @@ +// RUN: env CINDEXTEST_EDITING=1 c-index-test -test-load-source local %s 2>&1 \ +// RUN: | FileCheck %s --implicit-check-not "error:" +#ifndef FOO_H +#define FOO_H + +void foo(); + +#endif diff --git a/test/SemaObjC/arc-property-decl-attrs.m b/test/SemaObjC/arc-property-decl-attrs.m index ee48d310edc0..7393f58199f9 100644 --- a/test/SemaObjC/arc-property-decl-attrs.m +++ b/test/SemaObjC/arc-property-decl-attrs.m @@ -225,3 +225,30 @@ __attribute__((objc_root_class)) @implementation TypeVsSetter @synthesize prop; // expected-note {{property synthesized here}} @end + +@protocol AutoStrongProp + +@property (nonatomic, readonly) NSObject *prop; + +@end + +@protocol AutoStrongProp_Internal + +// This property gets the 'strong' attribute automatically. +@property (nonatomic, readwrite) NSObject *prop; + +@end + +@interface SynthesizeWithImplicitStrongNoError : NSObject +@end + +@interface SynthesizeWithImplicitStrongNoError () + +@end + +@implementation SynthesizeWithImplicitStrongNoError + +// no error, 'strong' is implicit in the 'readwrite' property. +@synthesize prop = _prop; + +@end diff --git a/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp index 6037127feb52..7bc8421bab2f 100644 --- a/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp +++ b/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp @@ -1108,26 +1108,35 @@ TEST(ConstructorDeclaration, IsExplicit) { } TEST(ConstructorDeclaration, Kinds) { - EXPECT_TRUE(matches("struct S { S(); };", - cxxConstructorDecl(isDefaultConstructor()))); - EXPECT_TRUE(notMatches("struct S { S(); };", - cxxConstructorDecl(isCopyConstructor()))); - EXPECT_TRUE(notMatches("struct S { S(); };", - cxxConstructorDecl(isMoveConstructor()))); + EXPECT_TRUE(matches( + "struct S { S(); };", + cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit())))); + EXPECT_TRUE(notMatches( + "struct S { S(); };", + cxxConstructorDecl(isCopyConstructor(), unless(isImplicit())))); + EXPECT_TRUE(notMatches( + "struct S { S(); };", + cxxConstructorDecl(isMoveConstructor(), unless(isImplicit())))); - EXPECT_TRUE(notMatches("struct S { S(const S&); };", - cxxConstructorDecl(isDefaultConstructor()))); - EXPECT_TRUE(matches("struct S { S(const S&); };", - cxxConstructorDecl(isCopyConstructor()))); - EXPECT_TRUE(notMatches("struct S { S(const S&); };", - cxxConstructorDecl(isMoveConstructor()))); + EXPECT_TRUE(notMatches( + "struct S { S(const S&); };", + cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit())))); + EXPECT_TRUE(matches( + "struct S { S(const S&); };", + cxxConstructorDecl(isCopyConstructor(), unless(isImplicit())))); + EXPECT_TRUE(notMatches( + "struct S { S(const S&); };", + cxxConstructorDecl(isMoveConstructor(), unless(isImplicit())))); - EXPECT_TRUE(notMatches("struct S { S(S&&); };", - cxxConstructorDecl(isDefaultConstructor()))); - EXPECT_TRUE(notMatches("struct S { S(S&&); };", - cxxConstructorDecl(isCopyConstructor()))); - EXPECT_TRUE(matches("struct S { S(S&&); };", - cxxConstructorDecl(isMoveConstructor()))); + EXPECT_TRUE(notMatches( + "struct S { S(S&&); };", + cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit())))); + EXPECT_TRUE(notMatches( + "struct S { S(S&&); };", + cxxConstructorDecl(isCopyConstructor(), unless(isImplicit())))); + EXPECT_TRUE(matches( + "struct S { S(S&&); };", + cxxConstructorDecl(isMoveConstructor(), unless(isImplicit())))); } TEST(ConstructorDeclaration, IsUserProvided) { diff --git a/unittests/Format/FormatTestComments.cpp b/unittests/Format/FormatTestComments.cpp index 7916e65e5114..f3c45fac34a9 100644 --- a/unittests/Format/FormatTestComments.cpp +++ b/unittests/Format/FormatTestComments.cpp @@ -2267,6 +2267,13 @@ TEST_F(FormatTestComments, AlignTrailingComments) { "int k; // line longg long", getLLVMStyleWithColumns(20))); + // Always align if ColumnLimit = 0 + EXPECT_EQ("int i, j; // line 1\n" + "int k; // line longg long", + format("int i, j; // line 1\n" + "int k; // line longg long", + getLLVMStyleWithColumns(0))); + // Align comment line sections aligned with the next token with the next // token. EXPECT_EQ("class A {\n" From 965351a4eb4834d918700213220407ee47e30f67 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 24 Aug 2017 16:35:23 +0000 Subject: [PATCH 3/4] Vendor import of compiler-rt release_50 branch r311606: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@311606 --- lib/builtins/arm/aeabi_dcmp.S | 9 +++++++++ lib/builtins/arm/aeabi_fcmp.S | 9 +++++++++ lib/esan/esan_sideline_linux.cpp | 2 +- lib/profile/InstrProfilingNameVar.c | 16 ++++++++-------- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/lib/builtins/arm/aeabi_dcmp.S b/lib/builtins/arm/aeabi_dcmp.S index 51539c0ac813..9fa78b461248 100644 --- a/lib/builtins/arm/aeabi_dcmp.S +++ b/lib/builtins/arm/aeabi_dcmp.S @@ -18,11 +18,20 @@ // } // } +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \ + vmov d0, r0, r1 SEPARATOR \ + vmov d1, r2, r3 +#else +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS +#endif + #define DEFINE_AEABI_DCMP(cond) \ .syntax unified SEPARATOR \ .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ push { r4, lr } SEPARATOR \ + CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ diff --git a/lib/builtins/arm/aeabi_fcmp.S b/lib/builtins/arm/aeabi_fcmp.S index 8e7774b58974..ea5b96c21d57 100644 --- a/lib/builtins/arm/aeabi_fcmp.S +++ b/lib/builtins/arm/aeabi_fcmp.S @@ -18,11 +18,20 @@ // } // } +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \ + vmov s0, r0 SEPARATOR \ + vmov s1, r1 +#else +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS +#endif + #define DEFINE_AEABI_FCMP(cond) \ .syntax unified SEPARATOR \ .p2align 2 SEPARATOR \ DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ push { r4, lr } SEPARATOR \ + CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \ bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ cmp r0, #0 SEPARATOR \ b ## cond 1f SEPARATOR \ diff --git a/lib/esan/esan_sideline_linux.cpp b/lib/esan/esan_sideline_linux.cpp index d04f5909d6a2..bc272dfe49f8 100644 --- a/lib/esan/esan_sideline_linux.cpp +++ b/lib/esan/esan_sideline_linux.cpp @@ -70,7 +70,7 @@ int SidelineThread::runSideline(void *Arg) { // Set up a signal handler on an alternate stack for safety. InternalScopedBuffer StackMap(SigAltStackSize); - struct sigaltstack SigAltStack; + stack_t SigAltStack; SigAltStack.ss_sp = StackMap.data(); SigAltStack.ss_size = SigAltStackSize; SigAltStack.ss_flags = 0; diff --git a/lib/profile/InstrProfilingNameVar.c b/lib/profile/InstrProfilingNameVar.c index a0c448c679b5..264568fbc912 100644 --- a/lib/profile/InstrProfilingNameVar.c +++ b/lib/profile/InstrProfilingNameVar.c @@ -1,11 +1,11 @@ -//===- InstrProfilingNameVar.c - profile name variable setup --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// +/*===- InstrProfilingNameVar.c - profile name variable setup -------------===*\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +\*===----------------------------------------------------------------------===*/ #include "InstrProfiling.h" From f9cfece4e841319f868ef5eac1d5350c35f2bf19 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 24 Aug 2017 16:35:38 +0000 Subject: [PATCH 4/4] Vendor import of lldb release_50 branch r311606: https://llvm.org/svn/llvm-project/lldb/branches/release_50@311606 --- .../register/register_command/TestRegisters.py | 2 +- source/Core/CMakeLists.txt | 10 ++++++++++ source/Host/CMakeLists.txt | 16 +++++++++++++++- .../Utility/RegisterContextLinux_i386.cpp | 3 +-- .../Process/Utility/RegisterContext_x86.h | 3 +-- unittests/CMakeLists.txt | 6 ++++++ 6 files changed, 34 insertions(+), 6 deletions(-) diff --git a/packages/Python/lldbsuite/test/functionalities/register/register_command/TestRegisters.py b/packages/Python/lldbsuite/test/functionalities/register/register_command/TestRegisters.py index cc1389ca6b01..fe6ce2c25a3e 100644 --- a/packages/Python/lldbsuite/test/functionalities/register/register_command/TestRegisters.py +++ b/packages/Python/lldbsuite/test/functionalities/register/register_command/TestRegisters.py @@ -256,7 +256,7 @@ def fp_special_purpose_register_read(self): self.expect( "register read ftag", substrs=[ 'ftag' + ' = ', str( - "0x%0.2x" % + "0x%0.4x" % (reg_value_ftag_initial | ( 1 << fstat_top_pointer_initial)))]) reg_value_ftag_initial = reg_value_ftag_initial | ( diff --git a/source/Core/CMakeLists.txt b/source/Core/CMakeLists.txt index c5105bd9ab12..c345afb4064a 100644 --- a/source/Core/CMakeLists.txt +++ b/source/Core/CMakeLists.txt @@ -1,3 +1,12 @@ +set(LLDB_CURSES_LIBS) + +if (NOT LLDB_DISABLE_CURSES) + list(APPEND LLDB_CURSES_LIBS ${CURSES_LIBRARIES}) + if(LLVM_ENABLE_TERMINFO AND HAVE_TERMINFO) + list(APPEND LLDB_CURSES_LIBS ${TERMINFO_LIBS}) + endif() +endif() + add_lldb_library(lldbCore Address.cpp AddressRange.cpp @@ -62,6 +71,7 @@ add_lldb_library(lldbCore lldbPluginCPlusPlusLanguage lldbPluginObjCLanguage lldbPluginObjectFileJIT + ${LLDB_CURSES_LIBS} LINK_COMPONENTS BinaryFormat diff --git a/source/Host/CMakeLists.txt b/source/Host/CMakeLists.txt index 2ee599cf43a2..1696e7eab6ea 100644 --- a/source/Host/CMakeLists.txt +++ b/source/Host/CMakeLists.txt @@ -156,9 +156,23 @@ if (${get_python_libdir}) endif() endif() +set(EXTRA_LIBS) if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") - set(EXTRA_LIBS kvm) + list(APPEND EXTRA_LIBS kvm) endif () +if (APPLE) + list(APPEND EXTRA_LIBS xml2) +else () + if (LIBXML2_FOUND) + list(APPEND EXTRA_LIBS ${LIBXML2_LIBRARIES}) + endif() +endif () +if (HAVE_LIBDL) + list(APPEND EXTRA_LIBS ${CMAKE_DL_LIBS}) +endif() +if (NOT LLDB_DISABLE_LIBEDIT) + list(APPEND EXTRA_LIBS edit) +endif() add_lldb_library(lldbHost ${HOST_SOURCES} diff --git a/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp b/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp index 6563796db12c..2cb17cb182e2 100644 --- a/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp +++ b/source/Plugins/Process/Utility/RegisterContextLinux_i386.cpp @@ -36,8 +36,7 @@ struct GPR { struct FPR_i386 { uint16_t fctrl; // FPU Control Word (fcw) uint16_t fstat; // FPU Status Word (fsw) - uint8_t ftag; // FPU Tag Word (ftw) - uint8_t reserved_1; // Reserved + uint16_t ftag; // FPU Tag Word (ftw) uint16_t fop; // Last Instruction Opcode (fop) union { struct { diff --git a/source/Plugins/Process/Utility/RegisterContext_x86.h b/source/Plugins/Process/Utility/RegisterContext_x86.h index ab2ca2bb6c2c..5f6fc295a15c 100644 --- a/source/Plugins/Process/Utility/RegisterContext_x86.h +++ b/source/Plugins/Process/Utility/RegisterContext_x86.h @@ -257,8 +257,7 @@ struct XMMReg { struct FXSAVE { uint16_t fctrl; // FPU Control Word (fcw) uint16_t fstat; // FPU Status Word (fsw) - uint8_t ftag; // FPU Tag Word (ftw) - uint8_t reserved_1; // Reserved + uint16_t ftag; // FPU Tag Word (ftw) uint16_t fop; // Last Instruction Opcode (fop) union { struct { diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index c7c3140b121a..f7b611802fbf 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -19,6 +19,12 @@ if (LLDB_BUILT_STANDALONE) if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest AND NOT TARGET gtest) add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/unittest utils/unittest) endif() + # LLVMTestingSupport library is needed for Process/gdb-remote. + if (EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Testing/Support + AND NOT TARGET LLVMTestingSupport) + add_subdirectory(${LLVM_MAIN_SRC_DIR}/lib/Testing/Support + lib/Testing/Support) + endif() endif() function(add_lldb_unittest test_name)