From b8a2042aa938069e862750553db0e4d82d25822c Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 28 Dec 2017 23:57:18 +0000 Subject: [PATCH] Vendor import of llvm trunk r321545: https://llvm.org/svn/llvm-project/llvm/trunk@321545 --- include/llvm/Support/KnownBits.h | 2 - .../SelectionDAG/LegalizeVectorOps.cpp | 17 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 39 ++- lib/LTO/LTOModule.cpp | 24 +- lib/Target/X86/X86ISelLowering.cpp | 52 ++-- lib/Target/X86/X86InstrAVX512.td | 16 -- lib/Target/X86/X86WinEHState.cpp | 6 + .../PowerPC/combine_loads_from_build_pair.ll | 2 + .../X86/bitcast-int-to-vector-bool-sext.ll | 15 +- .../X86/bitcast-int-to-vector-bool-zext.ll | 11 +- .../CodeGen/X86/bitcast-int-to-vector-bool.ll | 11 +- test/CodeGen/X86/setcc-wide-types.ll | 252 ++++++++++++++++++ .../X86/win32-eh-available-externally.ll | 28 ++ 13 files changed, 387 insertions(+), 88 deletions(-) create mode 100644 test/CodeGen/X86/win32-eh-available-externally.ll diff --git a/include/llvm/Support/KnownBits.h b/include/llvm/Support/KnownBits.h index 7a4de3e5ff12..97e73b13fca3 100644 --- a/include/llvm/Support/KnownBits.h +++ b/include/llvm/Support/KnownBits.h @@ -100,13 +100,11 @@ struct KnownBits { /// Make this value negative. void makeNegative() { - assert(!isNonNegative() && "Can't make a non-negative value negative"); One.setSignBit(); } /// Make this value negative. void makeNonNegative() { - assert(!isNegative() && "Can't make a negative value non-negative"); Zero.setSignBit(); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 74970ab5792c..7643790df350 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -49,6 +49,8 @@ using namespace llvm; +#define DEBUG_TYPE "legalizevectorops" + namespace { class VectorLegalizer { @@ -226,7 +228,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); @@ -252,11 +255,14 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandLoad(Op)); } + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast(Op.getNode()); EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); - if (StVT.isVector() && ST->isTruncatingStore()) + if (StVT.isVector() && ST->isTruncatingStore()) { + DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -270,6 +276,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } + } } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; @@ -376,6 +383,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; } + DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: @@ -383,12 +392,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; break; case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); break; case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom legalization\n"); if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; } case TargetLowering::Expand: diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a04c770c51c4..4c8b63d2f239 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5943,7 +5943,9 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6043,7 +6045,9 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, @@ -6108,7 +6112,9 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, @@ -6134,7 +6140,9 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, @@ -6160,7 +6168,9 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, @@ -6189,7 +6199,9 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, @@ -6224,7 +6236,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, @@ -6256,7 +6270,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, @@ -7112,6 +7128,8 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To, void SelectionDAG::salvageDebugInfo(SDNode &N) { if (!N.getHasDebugValue()) return; + + SmallVector ClonedDVs; for (auto DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; @@ -7135,13 +7153,16 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { SDDbgValue *Clone = getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - AddDbgValue(Clone, N0.getNode(), false); DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); } } } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, Dbg->getSDNode(), false); } namespace { diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 51b4f225939f..626d2f5dc813 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -388,24 +388,20 @@ void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) { // from the ObjC data structures generated by the front end. // special case if this data blob is an ObjC class definition - std::string Section = v->getSection(); - if (Section.compare(0, 15, "__OBJC,__class,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCClass(gv); + if (const GlobalVariable *GV = dyn_cast(v)) { + StringRef Section = GV->getSection(); + if (Section.startswith("__OBJC,__class,")) { + addObjCClass(GV); } - } - // special case if this data blob is an ObjC category definition - else if (Section.compare(0, 18, "__OBJC,__category,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCCategory(gv); + // special case if this data blob is an ObjC category definition + else if (Section.startswith("__OBJC,__category,")) { + addObjCCategory(GV); } - } - // special case if this data blob is the list of referenced classes - else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) { - if (const GlobalVariable *gv = dyn_cast(v)) { - addObjCClassRef(gv); + // special case if this data blob is the list of referenced classes + else if (Section.startswith("__OBJC,__cls_refs,")) { + addObjCClassRef(GV); } } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ba3b02e25a9d..9edd799779c7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16281,7 +16281,7 @@ static SDValue LowerZERO_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); - SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal); + SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); } // Extract back to 128/256-bit if we widened. @@ -18426,7 +18426,7 @@ static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, // Truncate if we had to extend i16/i8 above. if (VT != ExtVT) { WideVT = MVT::getVectorVT(VTElt, NumElts); - V = DAG.getNode(X86ISD::VTRUNC, dl, WideVT, V); + V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); } // Extract back to 128/256-bit if we widened. @@ -18679,6 +18679,14 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Load, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, Load); @@ -18698,22 +18706,25 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, if (NumElts <= 8) { // A subset, assume that we have only AVX-512F - unsigned NumBitsToLoad = 8; - MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad); - SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(), + SDValue Load = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getMemOperand()); // Replace chain users with the new chain. assert(Load->getNumValues() == 2 && "Loads must carry a chain!"); DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumBitsToLoad); - SDValue BitVec = DAG.getBitcast(MaskVT, Load); + SDValue BitVec = DAG.getBitcast(MVT::v8i1, Load); if (NumElts == 8) return DAG.getNode(ExtOpcode, dl, VT, BitVec); - // we should take care to v4i1 and v2i1 + if (Subtarget.hasVLX()) { + // Extract to v4i1/v2i1. + SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, BitVec, + DAG.getIntPtrConstant(0, dl)); + // Finally, do a normal sign-extend to the desired register. + return DAG.getNode(ExtOpcode, dl, Op.getValueType(), Extract); + } MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 8); SDValue ExtVec = DAG.getNode(ExtOpcode, dl, ExtVT, BitVec); @@ -18728,13 +18739,12 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op, Ld->getBasePtr(), Ld->getMemOperand()); - SDValue BasePtrHi = - DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(2, dl, BasePtr.getValueType())); + SDValue BasePtrHi = DAG.getMemBasePlusOffset(BasePtr, 2, dl); - SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), - BasePtrHi, - Ld->getMemOperand()); + SDValue LoadHi = DAG.getLoad(MVT::v16i1, dl, Ld->getChain(), BasePtrHi, + Ld->getPointerInfo().getWithOffset(2), + MinAlign(Ld->getAlignment(), 2U), + Ld->getMemOperand()->getFlags()); SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadLo.getValue(1), LoadHi.getValue(1)); @@ -34051,15 +34061,14 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getMemBasePlusOffset(Ptr, 16, dl); SDValue Load2 = - DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), - std::min(16U, Alignment), Ld->getMemOperand()->getFlags()); + DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, + Ld->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), Ld->getMemOperand()->getFlags()); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); - SDValue NewVec = DAG.getUNDEF(RegVT); - NewVec = insert128BitVector(NewVec, Load1, 0, DAG, dl); - NewVec = insert128BitVector(NewVec, Load2, NumElems / 2, DAG, dl); + SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); return DCI.CombineTo(N, NewVec, TF, true); } @@ -34465,8 +34474,9 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, DAG.getStore(St->getChain(), dl, Value0, Ptr0, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags()); SDValue Ch1 = - DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), - std::min(16U, Alignment), St->getMemOperand()->getFlags()); + DAG.getStore(St->getChain(), dl, Value1, Ptr1, + St->getPointerInfo().getWithOffset(16), + MinAlign(Alignment, 16U), St->getMemOperand()->getFlags()); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 46c19f18f8d3..dcd84930741b 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8704,17 +8704,6 @@ def rr : AVX512XS8I, EVEX, Sched<[WriteMove]>; } -// Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_convert_mask_to_vector_lowering { - - def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))), - (X86Info.VT (EXTRACT_SUBREG - (_.VT (!cast(NAME#"Zrr") - (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))), - X86Info.SubRegIdx))>; -} - multiclass cvt_mask_by_elt_width opc, AVX512VLVectorVTInfo VTInfo, string OpcodeStr, Predicate prd> { let Predicates = [prd] in @@ -8724,11 +8713,6 @@ let Predicates = [prd] in defm Z256 : cvt_by_vec_width, EVEX_V256; defm Z128 : cvt_by_vec_width, EVEX_V128; } -let Predicates = [prd, NoVLX] in { - defm Z256_Alt : avx512_convert_mask_to_vector_lowering; - defm Z128_Alt : avx512_convert_mask_to_vector_lowering; - } - } defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>; diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 0472a85f50da..6d6dedc60736 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -149,6 +149,12 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const { } bool WinEHStatePass::runOnFunction(Function &F) { + // Don't insert state stores or exception handler thunks for + // available_externally functions. The handler needs to reference the LSDA, + // which will not be emitted in this case. + if (F.hasAvailableExternallyLinkage()) + return false; + // Check the personality. Do nothing if this personality doesn't use funclets. if (!F.hasPersonalityFn()) return false; diff --git a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll index 0f8f18a17879..45cc740d1eae 100644 --- a/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll +++ b/test/CodeGen/PowerPC/combine_loads_from_build_pair.ll @@ -12,6 +12,8 @@ define i64 @func1(i64 %p1, i64 %p2, i64 %p3, i64 %p4, { i64, i8* } %struct) { ; CHECK-DAG: [[LOBITS:t[0-9]+]]: i32,ch = load ; CHECK-DAG: [[HIBITS:t[0-9]+]]: i32,ch = load ; CHECK: Combining: t{{[0-9]+}}: i64 = build_pair [[LOBITS]], [[HIBITS]] +; CHECK-NEXT: Creating new node +; CHECK-SAME: load @ext_i2_2i64(i2 %a0) { ; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 -; AVX512-NEXT: vzeroupper +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %1 = bitcast i2 %a0 to <2 x i1> %2 = sext <2 x i1> %1 to <2 x i64> @@ -91,10 +90,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512-NEXT: vzeroupper +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = sext <4 x i1> %1 to <4 x i32> @@ -246,8 +243,8 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = sext <4 x i1> %1 to <4 x i64> diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll index f88b540323cb..9e77cd11449e 100644 --- a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -63,9 +63,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) { ; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 -; AVX512VLBW-NEXT: vzeroupper +; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i2 %a0 to <2 x i1> %2 = zext <2 x i1> %1 to <2 x i64> @@ -120,9 +118,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; AVX512VLBW-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512VLBW-NEXT: vzeroupper +; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = zext <4 x i1> %1 to <4 x i32> @@ -317,8 +313,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = zext <4 x i1> %1 to <4 x i64> diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll index 6d9f832d861f..45a48fae146d 100644 --- a/test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ b/test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -46,9 +46,8 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) { ; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 -; AVX512-NEXT: vzeroupper +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %1 = bitcast i2 %a0 to <2 x i1> ret <2 x i1> %1 @@ -90,10 +89,8 @@ define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) { ; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 -; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512-NEXT: vzeroupper +; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> ret <4 x i1> %1 diff --git a/test/CodeGen/X86/setcc-wide-types.ll b/test/CodeGen/X86/setcc-wide-types.ll index f935db72dcb9..410378ffbad2 100644 --- a/test/CodeGen/X86/setcc-wide-types.ll +++ b/test/CodeGen/X86/setcc-wide-types.ll @@ -138,3 +138,255 @@ define i32 @eq_i256(<4 x i64> %x, <4 x i64> %y) { ret i32 %zext } +; This test models the expansion of 'memcmp(a, b, 32) != 0' +; if we allowed 2 pairs of 16-byte loads per block. + +define i32 @ne_i128_pair(i128* %a, i128* %b) { +; SSE2-LABEL: ne_i128_pair: +; SSE2: # %bb.0: +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %rcx +; SSE2-NEXT: xorq (%rsi), %rax +; SSE2-NEXT: xorq 8(%rsi), %rcx +; SSE2-NEXT: movq 24(%rdi), %rdx +; SSE2-NEXT: movq 16(%rdi), %rdi +; SSE2-NEXT: xorq 16(%rsi), %rdi +; SSE2-NEXT: orq %rax, %rdi +; SSE2-NEXT: xorq 24(%rsi), %rdx +; SSE2-NEXT: orq %rcx, %rdx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %rdi, %rdx +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; AVX2-LABEL: ne_i128_pair: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: movq 8(%rdi), %rcx +; AVX2-NEXT: xorq (%rsi), %rax +; AVX2-NEXT: xorq 8(%rsi), %rcx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: movq 16(%rdi), %rdi +; AVX2-NEXT: xorq 16(%rsi), %rdi +; AVX2-NEXT: orq %rax, %rdi +; AVX2-NEXT: xorq 24(%rsi), %rdx +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: orq %rdi, %rdx +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq + %a0 = load i128, i128* %a + %b0 = load i128, i128* %b + %xor1 = xor i128 %a0, %b0 + %ap1 = getelementptr i128, i128* %a, i128 1 + %bp1 = getelementptr i128, i128* %b, i128 1 + %a1 = load i128, i128* %ap1 + %b1 = load i128, i128* %bp1 + %xor2 = xor i128 %a1, %b1 + %or = or i128 %xor1, %xor2 + %cmp = icmp ne i128 %or, 0 + %z = zext i1 %cmp to i32 + ret i32 %z +} + +; This test models the expansion of 'memcmp(a, b, 32) == 0' +; if we allowed 2 pairs of 16-byte loads per block. + +define i32 @eq_i128_pair(i128* %a, i128* %b) { +; SSE2-LABEL: eq_i128_pair: +; SSE2: # %bb.0: +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq 8(%rdi), %rcx +; SSE2-NEXT: xorq (%rsi), %rax +; SSE2-NEXT: xorq 8(%rsi), %rcx +; SSE2-NEXT: movq 24(%rdi), %rdx +; SSE2-NEXT: movq 16(%rdi), %rdi +; SSE2-NEXT: xorq 16(%rsi), %rdi +; SSE2-NEXT: orq %rax, %rdi +; SSE2-NEXT: xorq 24(%rsi), %rdx +; SSE2-NEXT: orq %rcx, %rdx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %rdi, %rdx +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; AVX2-LABEL: eq_i128_pair: +; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rax +; AVX2-NEXT: movq 8(%rdi), %rcx +; AVX2-NEXT: xorq (%rsi), %rax +; AVX2-NEXT: xorq 8(%rsi), %rcx +; AVX2-NEXT: movq 24(%rdi), %rdx +; AVX2-NEXT: movq 16(%rdi), %rdi +; AVX2-NEXT: xorq 16(%rsi), %rdi +; AVX2-NEXT: orq %rax, %rdi +; AVX2-NEXT: xorq 24(%rsi), %rdx +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: orq %rdi, %rdx +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq + %a0 = load i128, i128* %a + %b0 = load i128, i128* %b + %xor1 = xor i128 %a0, %b0 + %ap1 = getelementptr i128, i128* %a, i128 1 + %bp1 = getelementptr i128, i128* %b, i128 1 + %a1 = load i128, i128* %ap1 + %b1 = load i128, i128* %bp1 + %xor2 = xor i128 %a1, %b1 + %or = or i128 %xor1, %xor2 + %cmp = icmp eq i128 %or, 0 + %z = zext i1 %cmp to i32 + ret i32 %z +} + +; This test models the expansion of 'memcmp(a, b, 64) != 0' +; if we allowed 2 pairs of 32-byte loads per block. + +define i32 @ne_i256_pair(i256* %a, i256* %b) { +; SSE2-LABEL: ne_i256_pair: +; SSE2: # %bb.0: +; SSE2-NEXT: movq 16(%rdi), %r9 +; SSE2-NEXT: movq 24(%rdi), %r11 +; SSE2-NEXT: movq (%rdi), %r8 +; SSE2-NEXT: movq 8(%rdi), %r10 +; SSE2-NEXT: xorq 8(%rsi), %r10 +; SSE2-NEXT: xorq 24(%rsi), %r11 +; SSE2-NEXT: xorq (%rsi), %r8 +; SSE2-NEXT: xorq 16(%rsi), %r9 +; SSE2-NEXT: movq 48(%rdi), %rdx +; SSE2-NEXT: movq 32(%rdi), %rax +; SSE2-NEXT: movq 56(%rdi), %rcx +; SSE2-NEXT: movq 40(%rdi), %rdi +; SSE2-NEXT: xorq 40(%rsi), %rdi +; SSE2-NEXT: xorq 56(%rsi), %rcx +; SSE2-NEXT: orq %r11, %rcx +; SSE2-NEXT: orq %rdi, %rcx +; SSE2-NEXT: orq %r10, %rcx +; SSE2-NEXT: xorq 32(%rsi), %rax +; SSE2-NEXT: xorq 48(%rsi), %rdx +; SSE2-NEXT: orq %r9, %rdx +; SSE2-NEXT: orq %rax, %rdx +; SSE2-NEXT: orq %r8, %rdx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %rcx, %rdx +; SSE2-NEXT: setne %al +; SSE2-NEXT: retq +; +; AVX2-LABEL: ne_i256_pair: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: movq 24(%rdi), %r11 +; AVX2-NEXT: movq (%rdi), %r8 +; AVX2-NEXT: movq 8(%rdi), %r10 +; AVX2-NEXT: xorq 8(%rsi), %r10 +; AVX2-NEXT: xorq 24(%rsi), %r11 +; AVX2-NEXT: xorq (%rsi), %r8 +; AVX2-NEXT: xorq 16(%rsi), %r9 +; AVX2-NEXT: movq 48(%rdi), %rdx +; AVX2-NEXT: movq 32(%rdi), %rax +; AVX2-NEXT: movq 56(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %rdi +; AVX2-NEXT: xorq 40(%rsi), %rdi +; AVX2-NEXT: xorq 56(%rsi), %rcx +; AVX2-NEXT: orq %r11, %rcx +; AVX2-NEXT: orq %rdi, %rcx +; AVX2-NEXT: orq %r10, %rcx +; AVX2-NEXT: xorq 32(%rsi), %rax +; AVX2-NEXT: xorq 48(%rsi), %rdx +; AVX2-NEXT: orq %r9, %rdx +; AVX2-NEXT: orq %rax, %rdx +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq + %a0 = load i256, i256* %a + %b0 = load i256, i256* %b + %xor1 = xor i256 %a0, %b0 + %ap1 = getelementptr i256, i256* %a, i256 1 + %bp1 = getelementptr i256, i256* %b, i256 1 + %a1 = load i256, i256* %ap1 + %b1 = load i256, i256* %bp1 + %xor2 = xor i256 %a1, %b1 + %or = or i256 %xor1, %xor2 + %cmp = icmp ne i256 %or, 0 + %z = zext i1 %cmp to i32 + ret i32 %z +} + +; This test models the expansion of 'memcmp(a, b, 64) == 0' +; if we allowed 2 pairs of 32-byte loads per block. + +define i32 @eq_i256_pair(i256* %a, i256* %b) { +; SSE2-LABEL: eq_i256_pair: +; SSE2: # %bb.0: +; SSE2-NEXT: movq 16(%rdi), %r9 +; SSE2-NEXT: movq 24(%rdi), %r11 +; SSE2-NEXT: movq (%rdi), %r8 +; SSE2-NEXT: movq 8(%rdi), %r10 +; SSE2-NEXT: xorq 8(%rsi), %r10 +; SSE2-NEXT: xorq 24(%rsi), %r11 +; SSE2-NEXT: xorq (%rsi), %r8 +; SSE2-NEXT: xorq 16(%rsi), %r9 +; SSE2-NEXT: movq 48(%rdi), %rdx +; SSE2-NEXT: movq 32(%rdi), %rax +; SSE2-NEXT: movq 56(%rdi), %rcx +; SSE2-NEXT: movq 40(%rdi), %rdi +; SSE2-NEXT: xorq 40(%rsi), %rdi +; SSE2-NEXT: xorq 56(%rsi), %rcx +; SSE2-NEXT: orq %r11, %rcx +; SSE2-NEXT: orq %rdi, %rcx +; SSE2-NEXT: orq %r10, %rcx +; SSE2-NEXT: xorq 32(%rsi), %rax +; SSE2-NEXT: xorq 48(%rsi), %rdx +; SSE2-NEXT: orq %r9, %rdx +; SSE2-NEXT: orq %rax, %rdx +; SSE2-NEXT: orq %r8, %rdx +; SSE2-NEXT: xorl %eax, %eax +; SSE2-NEXT: orq %rcx, %rdx +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; AVX2-LABEL: eq_i256_pair: +; AVX2: # %bb.0: +; AVX2-NEXT: movq 16(%rdi), %r9 +; AVX2-NEXT: movq 24(%rdi), %r11 +; AVX2-NEXT: movq (%rdi), %r8 +; AVX2-NEXT: movq 8(%rdi), %r10 +; AVX2-NEXT: xorq 8(%rsi), %r10 +; AVX2-NEXT: xorq 24(%rsi), %r11 +; AVX2-NEXT: xorq (%rsi), %r8 +; AVX2-NEXT: xorq 16(%rsi), %r9 +; AVX2-NEXT: movq 48(%rdi), %rdx +; AVX2-NEXT: movq 32(%rdi), %rax +; AVX2-NEXT: movq 56(%rdi), %rcx +; AVX2-NEXT: movq 40(%rdi), %rdi +; AVX2-NEXT: xorq 40(%rsi), %rdi +; AVX2-NEXT: xorq 56(%rsi), %rcx +; AVX2-NEXT: orq %r11, %rcx +; AVX2-NEXT: orq %rdi, %rcx +; AVX2-NEXT: orq %r10, %rcx +; AVX2-NEXT: xorq 32(%rsi), %rax +; AVX2-NEXT: xorq 48(%rsi), %rdx +; AVX2-NEXT: orq %r9, %rdx +; AVX2-NEXT: orq %rax, %rdx +; AVX2-NEXT: orq %r8, %rdx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: orq %rcx, %rdx +; AVX2-NEXT: sete %al +; AVX2-NEXT: retq + %a0 = load i256, i256* %a + %b0 = load i256, i256* %b + %xor1 = xor i256 %a0, %b0 + %ap1 = getelementptr i256, i256* %a, i256 1 + %bp1 = getelementptr i256, i256* %b, i256 1 + %a1 = load i256, i256* %ap1 + %b1 = load i256, i256* %bp1 + %xor2 = xor i256 %a1, %b1 + %or = or i256 %xor1, %xor2 + %cmp = icmp eq i256 %or, 0 + %z = zext i1 %cmp to i32 + ret i32 %z +} + diff --git a/test/CodeGen/X86/win32-eh-available-externally.ll b/test/CodeGen/X86/win32-eh-available-externally.ll new file mode 100644 index 000000000000..49da191de978 --- /dev/null +++ b/test/CodeGen/X86/win32-eh-available-externally.ll @@ -0,0 +1,28 @@ +; RUN: opt -S -x86-winehstate < %s | FileCheck %s --check-prefix=IR +; RUN: llc < %s | FileCheck %s --check-prefix=ASM + +; IR-NOT: define.*__ehhandler +; IR: define available_externally void @foo(void ()*) +; IR-NOT: define.*__ehhandler + +; No code should be emitted. +; ASM-NOT: __ehtable +; ASM-NOT: __ehhandler + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc" + +declare i32 @__CxxFrameHandler3(...) unnamed_addr + +define available_externally void @foo(void ()*) personality i32 (...)* @__CxxFrameHandler3 { +start: + invoke void %0() + to label %good unwind label %bad + +good: ; preds = %start + ret void + +bad: ; preds = %start + %cleanuppad = cleanuppad within none [] + cleanupret from %cleanuppad unwind to caller +}