diff --git a/CMakeLists.txt b/CMakeLists.txt index 039315055444..b38f45d12689 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 9) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX "") diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index ceee1be5e1e7..619fab283102 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -334,9 +334,11 @@ public: struct PointerInfo { /// Holds the pointer value that we need to check. TrackingVH PointerValue; - /// Holds the pointer value at the beginning of the loop. + /// Holds the smallest byte address accessed by the pointer throughout all + /// iterations of the loop. const SCEV *Start; - /// Holds the pointer value at the end of the loop. + /// Holds the largest byte address accessed by the pointer throughout all + /// iterations of the loop, plus 1. const SCEV *End; /// Holds the information if this pointer is used for writing to memory. bool IsWritePtr; diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h index adcb063f4544..9451fa57c0f6 100644 --- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h +++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h @@ -72,7 +72,7 @@ public: } void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override { - registerEHFramesInProcess(Addr, Size); + deregisterEHFramesInProcess(Addr, Size); } /// This method returns the address of the specified function or variable in diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index 5ece731fa143..e9264ec59b55 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -668,13 +668,12 @@ def int_masked_gather: Intrinsic<[llvm_anyvector_ty], [LLVMVectorOfPointersToElt<0>, llvm_i32_ty, LLVMVectorSameWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly]>; + [IntrReadMem]>; def int_masked_scatter: Intrinsic<[], [llvm_anyvector_ty, LLVMVectorOfPointersToElt<0>, llvm_i32_ty, - LLVMVectorSameWidth<0, llvm_i1_ty>], - [IntrArgMemOnly]>; + LLVMVectorSameWidth<0, llvm_i1_ty>]>; // Test whether a pointer is associated with a type metadata identifier. def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h index d5baf7ab0b9e..046f85caec9d 100644 --- a/include/llvm/IR/TypeFinder.h +++ b/include/llvm/IR/TypeFinder.h @@ -59,6 +59,8 @@ public: StructType *&operator[](unsigned Idx) { return StructTypes[Idx]; } + DenseSet &getVisitedMetadata() { return VisitedMetadata; } + private: /// incorporateType - This method adds the type to the list of used /// structures if it's not in there already. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 0d774cf08e2f..5214eb7c051c 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -148,6 +148,19 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, return OrigSCEV; } +/// Calculate Start and End points of memory access. +/// Let's assume A is the first access and B is a memory access on N-th loop +/// iteration. Then B is calculated as: +/// B = A + Step*N . +/// Step value may be positive or negative. +/// N is a calculated back-edge taken count: +/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 +/// Start and End points are calculated in the following way: +/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, +/// where SizeOfElt is the size of single memory access in bytes. +/// +/// There is no conflict when the intervals are disjoint: +/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, @@ -176,12 +189,17 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, if (CStep->getValue()->isNegative()) std::swap(ScStart, ScEnd); } else { - // Fallback case: the step is not constant, but the we can still + // Fallback case: the step is not constant, but we can still // get the upper and lower bounds of the interval by using min/max // expressions. ScStart = SE->getUMinExpr(ScStart, ScEnd); ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); } + // Add the size of the pointed element to ScEnd. + unsigned EltSize = + Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8; + const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize); + ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); } Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); @@ -1863,9 +1881,17 @@ std::pair LoopAccessInfo::addRuntimeChecks( Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc"); Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc"); - Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); + // [A|B].Start points to the first accessed byte under base [A|B]. + // [A|B].End points to the last accessed byte, plus one. + // There is no conflict when the intervals are disjoint: + // NoConflict = (B.Start >= A.End) || (A.Start >= B.End) + // + // bound0 = (B.Start < A.End) + // bound1 = (A.Start < B.End) + // IsConflict = bound0 & bound1 + Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0"); FirstInst = getFirstInst(FirstInst, Cmp0, Loc); - Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); + Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1"); FirstInst = getFirstInst(FirstInst, Cmp1, Loc); Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); FirstInst = getFirstInst(FirstInst, IsConflict, Loc); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 23e2aa70d0c7..5dacbf9e6b02 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -776,9 +776,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, } static void -mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, - MachineBasicBlock &MBBCommon) { - // Merge MMOs from memory operations in the common block. +mergeOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { MachineBasicBlock *MBB = MBBIStartPos->getParent(); // Note CommonTailLen does not necessarily matches the size of // the common BB nor all its instructions because of debug @@ -808,8 +807,18 @@ mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, "Reached BB end within common tail length!"); assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!"); + // Merge MMOs from memory operations in the common block. if (MBBICommon->mayLoad() || MBBICommon->mayStore()) MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI)); + // Drop undef flags if they aren't present in all merged instructions. + for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBICommon->getOperand(I); + if (MO.isReg() && MO.isUndef()) { + const MachineOperand &OtherMO = MBBI->getOperand(I); + if (!OtherMO.isUndef()) + MO.setIsUndef(false); + } + } ++MBBI; ++MBBICommon; @@ -928,8 +937,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Merge MMOs from memory operations as needed. - mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Merge operations (MMOs, undef flags) + mergeOperations(SameTails[i].getTailStartPos(), *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist. diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index 4935868c00f4..09c67bc47863 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -694,6 +694,14 @@ void IRLinker::computeTypeMapping() { if (!ST->hasName()) continue; + if (TypeMap.DstStructTypesSet.hasType(ST)) { + // This is actually a type from the destination module. + // getIdentifiedStructTypes() can have found it by walking debug info + // metadata nodes, some of which get linked by name when ODR Type Uniquing + // is enabled on the Context, from the source to the destination module. + continue; + } + // Check to see if there is a dot in the name followed by a digit. size_t DotPos = ST->getName().rfind('.'); if (DotPos == 0 || DotPos == StringRef::npos || @@ -1336,13 +1344,19 @@ bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) { IRMover::IRMover(Module &M) : Composite(M) { TypeFinder StructTypes; - StructTypes.run(M, true); + StructTypes.run(M, /* OnlyNamed */ false); for (StructType *Ty : StructTypes) { if (Ty->isOpaque()) IdentifiedStructTypes.addOpaque(Ty); else IdentifiedStructTypes.addNonOpaque(Ty); } + // Self-map metadatas in the destination module. This is needed when + // DebugTypeODRUniquing is enabled on the LLVMContext, since metadata in the + // destination module may be reached from the source module. + for (auto *MD : StructTypes.getVisitedMetadata()) { + SharedMDs[MD].reset(const_cast(MD)); + } } Error IRMover::move( diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 117d4e8bcb52..55fd76d375a2 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -412,7 +412,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) { if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS)) return; -#if HAVE_DLFCN_H && __GNUG__ +#if HAVE_DLFCN_H && __GNUG__ && !defined(__CYGWIN__) int width = 0; for (int i = 0; i < depth; ++i) { Dl_info dlinfo; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fe699b284882..db8b9fb923bf 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4819,6 +4819,10 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; +// Version w/ the .w suffix. +def : t2InstAlias<"ldr${p}.w $Rt, $immediate", + (t2LDRConstPool GPRnopc:$Rt, + const_pool_asm_imm:$immediate, pred:$p)>; // PLD/PLDW/PLI with alternate literal form. def : t2InstAlias<"pld${p} $addr", diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7d49302f9a96..f5de8a3cd25e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6933,6 +6933,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, else if (Inst.getOpcode() == ARM::t2LDRConstPool) TmpInst.setOpcode(ARM::t2LDRpci); const ARMOperand &PoolOperand = + (static_cast(*Operands[2]).isToken() && + static_cast(*Operands[2]).getToken() == ".w") ? + static_cast(*Operands[4]) : static_cast(*Operands[3]); const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); // If SubExprVal is a constant we may be able to use a MOV diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3d06de804200..6dd73174565a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -665,9 +665,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } + if (Subtarget.hasP9Vector()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); } } @@ -7846,6 +7847,17 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } +SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && + "Should only be called for ISD::INSERT_VECTOR_ELT"); + ConstantSDNode *C = dyn_cast(Op.getOperand(2)); + // We have legal lowering for constant indices but not for variable ones. + if (C) + return Op; + return SDValue(); +} + SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8248,6 +8260,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); // For counter-based loop handling. @@ -8372,7 +8385,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8412,8 +8427,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8435,11 +8454,40 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB + + // For max/min... + // loopMBB: + // l[wd]arx dest, ptr + // cmpl?[wd] incr, dest + // bgt exitMBB + // loop2MBB: + // st[wd]cx. dest, ptr + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); + if (CmpOpcode) { + // Signed comparisons of byte or halfword values must be sign-extended. + if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { + unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), + ExtReg).addReg(dest); + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(incr).addReg(ExtReg); + } else + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(incr).addReg(dest); + + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) @@ -8457,10 +8505,13 @@ MachineBasicBlock * PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation - unsigned BinOpcode) const { + unsigned BinOpcode, + unsigned CmpOpcode, + unsigned CmpPred) const { // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) - return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, + CmpOpcode, CmpPred); // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -8482,8 +8533,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *loop2MBB = + CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); + if (CmpOpcode) + F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); @@ -8568,6 +8623,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI, .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) .addReg(TmpReg).addReg(MaskReg); + if (CmpOpcode) { + // For unsigned comparisons, we can directly compare the shifted values. + // For signed comparisons we shift and sign extend. + unsigned SReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg) + .addReg(TmpDestReg).addReg(MaskReg); + unsigned ValueReg = SReg; + unsigned CmpReg = Incr2Reg; + if (CmpOpcode == PPC::CMPW) { + ValueReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) + .addReg(SReg).addReg(ShiftReg); + unsigned ValueSReg = RegInfo.createVirtualRegister(RC); + BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) + .addReg(ValueReg); + ValueReg = ValueSReg; + CmpReg = incr; + } + BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) + .addReg(CmpReg).addReg(ValueReg); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); + BB->addSuccessor(loop2MBB); + BB->addSuccessor(exitMBB); + BB = loop2MBB; + } BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) @@ -9074,6 +9155,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); + + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) + BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) + BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) + BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) + BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); + else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index e3be8074e62e..e60504507d32 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -585,11 +585,15 @@ namespace llvm { MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, - unsigned BinOpcode) const; + unsigned BinOpcode, + unsigned CmpOpcode = 0, + unsigned CmpPred = 0) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, - unsigned Opcode) const; + unsigned Opcode, + unsigned CmpOpcode = 0, + unsigned CmpPred = 0) const; MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const; @@ -824,6 +828,7 @@ namespace llvm { SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index e7eb8a16180a..5e514c8e8cf6 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -224,6 +224,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I64 : Pseudo< (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64", [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64", + [(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_MAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64", + [(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMIN_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64", + [(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>; + def ATOMIC_LOAD_UMAX_I64 : Pseudo< + (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64", + [(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64", diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 4a42a947c6cb..a40d4e1a4a69 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1509,6 +1509,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8", [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8", + [(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8", + [(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8", + [(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I8 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8", + [(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16", [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; @@ -1527,6 +1539,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16", [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16", + [(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16", + [(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16", + [(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I16 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16", + [(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32", [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; @@ -1545,6 +1569,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32", [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32", + [(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_MAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32", + [(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMIN_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32", + [(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>; + def ATOMIC_LOAD_UMAX_I32 : Pseudo< + (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32", + [(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8", diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2c548384f1cb..ca2053350138 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); + + // Make sure the newly-created LOAD is in the same position as Ld in + // terms of dependency. We create a TokenFactor for Ld and V, + // and update uses of Ld's output chain to use the TokenFactor. + if (Ld->hasAnyUseOfValue(1)) { + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + SDValue(Ld, 1), SDValue(V.getNode(), 1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain); + DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1), + SDValue(V.getNode(), 1)); + } } else if (!BroadcastFromReg) { // We can't broadcast from a vector register. return SDValue(); @@ -27516,7 +27527,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // pmulld is supported since SSE41. It is better to use pmulld // instead of pmullw+pmulhw. - if (Subtarget.hasSSE41()) + // pmullw/pmulhw are not supported by SSE. + if (Subtarget.hasSSE41() || !Subtarget.hasSSE2()) return SDValue(); ShrinkMode Mode; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index de4129f86541..803a7e35c209 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2124,7 +2124,7 @@ let Predicates = [HasAVX512] in { (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>; def : Pat<(i1 (trunc (i8 GR8:$src))), - (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri8 $src, (i8 1)), + (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri $src, (i8 1)), sub_8bit)), VK1)>; def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))), diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index d1769fc3ebb3..55ffc23e1308 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1322,6 +1322,10 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) { if (!isa(BB->front())) return false; + // If this BB is a landing pad, we won't be able to split the edge into it. + if (BB->isEHPad()) + return false; + // If we have a xor as the branch input to this block, and we know that the // LHS or RHS of the xor in any predecessor is true/false, then we can clone // the condition into the predecessor and fix that value to true, saving some diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll b/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll new file mode 100644 index 000000000000..01813c8a8104 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/memcheck-off-by-one-error.ll @@ -0,0 +1,51 @@ +; RUN: opt -analyze --loop-accesses %s | FileCheck %s + +; This test verifies run-time boundary check of memory accesses. +; The original loop: +; void fastCopy(const char* src, char* op) { +; int len = 32; +; while (len > 0) { +; *(reinterpret_cast(op)) = *(reinterpret_cast(src)); +; src += 8; +; op += 8; +; len -= 8; +; } +; } +; Boundaries calculations before this patch: +; (Low: %src High: (24 + %src)) +; and the actual distance between two pointers was 31, (%op - %src = 31) +; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop. +; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ), +; store a value at *%op touched memory under *%src. + +;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy' +;CHECK: (Low: %op High: (32 + %op)) +;CHECK: (Low: %src High: (32 + %src)) + +define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) { +entry: + br label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ] + %op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ] + %src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ] + %0 = bitcast i8* %src.addr.05 to i64* + %1 = load i64, i64* %0, align 8 + %2 = bitcast i8* %op.addr.06 to i64* + store i64 %1, i64* %2, align 8 + %add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8 + %add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8 + %sub = add nsw i32 %len.addr.07, -8 + %cmp = icmp sgt i32 %len.addr.07, 8 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} diff --git a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll index a9626f4dc710..2bae4867870a 100644 --- a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll +++ b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll @@ -96,15 +96,15 @@ for.end: ; preds = %for.body ; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: -; CHECK-NEXT: (Low: %c High: (78 + %c)) +; CHECK-NEXT: (Low: %c High: (80 + %c)) ; CHECK-NEXT: Member: {(2 + %c),+,4} ; CHECK-NEXT: Member: {%c,+,4} ; CHECK-NEXT: Group {{.*}}[[ONE]]: -; CHECK-NEXT: (Low: %a High: (40 + %a)) +; CHECK-NEXT: (Low: %a High: (42 + %a)) ; CHECK-NEXT: Member: {(2 + %a),+,2} ; CHECK-NEXT: Member: {%a,+,2} ; CHECK-NEXT: Group {{.*}}[[TWO]]: -; CHECK-NEXT: (Low: %b High: (38 + %b)) +; CHECK-NEXT: (Low: %b High: (40 + %b)) ; CHECK-NEXT: Member: {%b,+,2} define void @testg(i16* %a, @@ -168,15 +168,15 @@ for.end: ; preds = %for.body ; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: -; CHECK-NEXT: (Low: %c High: (78 + %c)) +; CHECK-NEXT: (Low: %c High: (80 + %c)) ; CHECK-NEXT: Member: {(2 + %c),+,4} ; CHECK-NEXT: Member: {%c,+,4} ; CHECK-NEXT: Group {{.*}}[[ONE]]: -; CHECK-NEXT: (Low: %a High: (40 + %a)) +; CHECK-NEXT: (Low: %a High: (42 + %a)) ; CHECK-NEXT: Member: {(2 + %a),+,2} ; CHECK-NEXT: Member: {%a,+,2} ; CHECK-NEXT: Group {{.*}}[[TWO]]: -; CHECK-NEXT: (Low: %b High: (38 + %b)) +; CHECK-NEXT: (Low: %b High: (40 + %b)) ; CHECK-NEXT: Member: {%b,+,2} define void @testh(i16* %a, @@ -247,13 +247,13 @@ for.end: ; preds = %for.body ; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group {{.*}}[[ZERO]]: -; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a)) +; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a)) ; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<%for.body> ; CHECK-NEXT: Group {{.*}}[[ONE]]: -; CHECK-NEXT: (Low: %a High: (9998 + %a)) +; CHECK-NEXT: (Low: %a High: (10000 + %a)) ; CHECK-NEXT: Member: {%a,+,2}<%for.body> ; CHECK-NEXT: Group {{.*}}[[TWO]]: -; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a)) +; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a)) ; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body> define void @testi(i16* %a, diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll index 607e007f7a2d..405a47554e4e 100644 --- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll @@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnueabi" ; CHECK: function 'f': -; CHECK: (Low: (20000 + %a) High: (60000 + %a)) +; CHECK: (Low: (20000 + %a) High: (60004 + %a)) @B = common global i32* null, align 8 @A = common global i32* null, align 8 @@ -59,7 +59,7 @@ for.end: ; preds = %for.body ; Here it is not obvious what the limits are, since 'step' could be negative. ; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) -; CHECK: High: ((60000 + %a) umax (60000 + (-40000 * %step) + %a)) +; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) define void @g(i64 %step) { entry: diff --git a/test/CodeGen/PowerPC/atomic-minmax.ll b/test/CodeGen/PowerPC/atomic-minmax.ll new file mode 100644 index 000000000000..5b9a15331897 --- /dev/null +++ b/test/CodeGen/PowerPC/atomic-minmax.ll @@ -0,0 +1,435 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @a32min(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw min i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32min +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32max(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw max i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32max +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32umin(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw umin i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32umin +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a32umax(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 { +entry: + %0 = atomicrmw umax i32* %minimum, i32 %val monotonic + ret void + +; CHECK-LABEL: @a32umax +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stwcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw min i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16min +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw max i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16max +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw umin i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16umin +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 { +entry: + %0 = atomicrmw umax i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @a16umax +; CHECK: lharx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: sthcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw min i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8min +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw max i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8max +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw umin i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8umin +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 { +entry: + %0 = atomicrmw umax i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @a8umax +; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmplw 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stbcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64min(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw min i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64min +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpd 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64max(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw max i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64max +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpd 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64umin(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw umin i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64umin +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpld 4, [[OLDV]] +; CHECK: bgelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @a64umax(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 { +entry: + %0 = atomicrmw umax i64* %minimum, i64 %val monotonic + ret void + +; CHECK-LABEL: @a64umax +; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: cmpld 4, [[OLDV]] +; CHECK: blelr 0 +; CHECK: stdcx. 4, 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw min i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16min +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw max i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16max +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw umin i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16umin +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 { +entry: + %0 = atomicrmw umax i16* %minimum, i16 %val monotonic + ret void + +; CHECK-LABEL: @ae16umax +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27 +; CHECK-DAG: li [[M1:[0-9]+]], 0 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16 +; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw min i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8min +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw max i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8max +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]] +; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]] +; CHECK: cmpw 0, 4, [[SESMOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw umin i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8umin +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: bgelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +define void @ae8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 { +entry: + %0 = atomicrmw umax i8* %minimum, i8 %val monotonic + ret void + +; CHECK-LABEL: @ae8umax +; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28 +; CHECK-DAG: li [[M1:[0-9]+]], 255 +; CHECK-DAG: rldicr 3, 3, 0, 61 +; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24 +; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]] +; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]] +; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]] +; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3 +; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: cmplw 0, 4, [[MOLDV]] +; CHECK: blelr 0 +; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]] +; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]] +; CHECK: stwcx. [[NEWV]], 0, 3 +; CHECK: bne 0, +; CHECK: blr +} + +attributes #0 = { nounwind "target-cpu"="ppc64" } +attributes #1 = { nounwind "target-cpu"="pwr8" } + diff --git a/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll index ac187e084257..fa2844b8d551 100644 --- a/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ b/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -968,3 +968,25 @@ entry: %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> ret <4 x float> %vecins } +define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { +entry: +; CHECK-LABEL: insertVarF +; CHECK: stxsspx 1, +; CHECK: lxvd2x +; CHECK-BE-LABEL: insertVarF +; CHECK-BE: stxsspx 1, +; CHECK-BE: lxvw4x + %vecins = insertelement <4 x float> %a, float %f, i32 %el + ret <4 x float> %vecins +} +define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) { +entry: +; CHECK-LABEL: insertVarI +; CHECK: stwx +; CHECK: lxvd2x +; CHECK-BE-LABEL: insertVarI +; CHECK-BE: stwx +; CHECK-BE: lxvw4x + %vecins = insertelement <4 x i32> %a, i32 %i, i32 %el + ret <4 x i32> %vecins +} diff --git a/test/CodeGen/PowerPC/pr30451.ll b/test/CodeGen/PowerPC/pr30451.ll new file mode 100644 index 000000000000..930553451cf8 --- /dev/null +++ b/test/CodeGen/PowerPC/pr30451.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s +define i8 @atomic_min_i8() { + top: + %0 = alloca i8, align 2 + %1 = bitcast i8* %0 to i8* + call void @llvm.lifetime.start(i64 2, i8* %1) + store i8 -1, i8* %0, align 2 + %2 = atomicrmw min i8* %0, i8 0 acq_rel + %3 = load atomic i8, i8* %0 acquire, align 8 + call void @llvm.lifetime.end(i64 2, i8* %1) + ret i8 %3 +; CHECK-LABEL: atomic_min_i8 +; CHECK: lbarx [[DST:[0-9]+]], +; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]] +; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]] +; CHECK-NEXT: bge 0 +} +define i16 @atomic_min_i16() { + top: + %0 = alloca i16, align 2 + %1 = bitcast i16* %0 to i8* + call void @llvm.lifetime.start(i64 2, i8* %1) + store i16 -1, i16* %0, align 2 + %2 = atomicrmw min i16* %0, i16 0 acq_rel + %3 = load atomic i16, i16* %0 acquire, align 8 + call void @llvm.lifetime.end(i64 2, i8* %1) + ret i16 %3 +; CHECK-LABEL: atomic_min_i16 +; CHECK: lharx [[DST:[0-9]+]], +; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]] +; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]] +; CHECK-NEXT: bge 0 +} + +define i8 @atomic_max_i8() { + top: + %0 = alloca i8, align 2 + %1 = bitcast i8* %0 to i8* + call void @llvm.lifetime.start(i64 2, i8* %1) + store i8 -1, i8* %0, align 2 + %2 = atomicrmw max i8* %0, i8 0 acq_rel + %3 = load atomic i8, i8* %0 acquire, align 8 + call void @llvm.lifetime.end(i64 2, i8* %1) + ret i8 %3 +; CHECK-LABEL: atomic_max_i8 +; CHECK: lbarx [[DST:[0-9]+]], +; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]] +; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]] +; CHECK-NEXT: ble 0 +} +define i16 @atomic_max_i16() { + top: + %0 = alloca i16, align 2 + %1 = bitcast i16* %0 to i8* + call void @llvm.lifetime.start(i64 2, i8* %1) + store i16 -1, i16* %0, align 2 + %2 = atomicrmw max i16* %0, i16 0 acq_rel + %3 = load atomic i16, i16* %0 acquire, align 8 + call void @llvm.lifetime.end(i64 2, i8* %1) + ret i16 %3 +; CHECK-LABEL: atomic_max_i16 +; CHECK: lharx [[DST:[0-9]+]], +; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]] +; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]] +; CHECK-NEXT: ble 0 +} + +declare void @llvm.lifetime.start(i64, i8*) +declare void @llvm.lifetime.end(i64, i8*) diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index b312be9aa6b2..534670795894 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -546,3 +546,64 @@ define <4 x double> @splat_concat4(double* %p) { %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> ret <4 x double> %6 } + +; +; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies. +; +define float @broadcast_lifetime() nounwind { +; X32-LABEL: broadcast_lifetime: +; X32: ## BB#0: +; X32-NEXT: pushl %esi +; X32-NEXT: subl $56, %esp +; X32-NEXT: leal {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: calll _gfunc +; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 +; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill +; X32-NEXT: movl %esi, (%esp) +; X32-NEXT: calll _gfunc +; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 +; X32-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 16-byte Folded Reload +; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; X32-NEXT: flds {{[0-9]+}}(%esp) +; X32-NEXT: addl $56, %esp +; X32-NEXT: popl %esi +; X32-NEXT: retl +; +; X64-LABEL: broadcast_lifetime: +; X64: ## BB#0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: leaq (%rsp), %rdi +; X64-NEXT: callq _gfunc +; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill +; X64-NEXT: leaq (%rsp), %rdi +; X64-NEXT: callq _gfunc +; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0 +; X64-NEXT: vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq + %1 = alloca <4 x float>, align 16 + %2 = alloca <4 x float>, align 16 + %3 = bitcast <4 x float>* %1 to i8* + %4 = bitcast <4 x float>* %2 to i8* + + call void @llvm.lifetime.start(i64 16, i8* %3) + call void @gfunc(<4 x float>* %1) + %5 = load <4 x float>, <4 x float>* %1, align 16 + call void @llvm.lifetime.end(i64 16, i8* %3) + + call void @llvm.lifetime.start(i64 16, i8* %4) + call void @gfunc(<4 x float>* %2) + %6 = load <4 x float>, <4 x float>* %2, align 16 + call void @llvm.lifetime.end(i64 16, i8* %4) + + %7 = extractelement <4 x float> %5, i32 1 + %8 = extractelement <4 x float> %6, i32 1 + %9 = fsub float %8, %7 + ret float %9 +} + +declare void @gfunc(<4 x float>*) +declare void @llvm.lifetime.start(i64, i8*) +declare void @llvm.lifetime.end(i64, i8*) diff --git a/test/CodeGen/X86/branchfolding-undef.mir b/test/CodeGen/X86/branchfolding-undef.mir new file mode 100644 index 000000000000..0da167b33257 --- /dev/null +++ b/test/CodeGen/X86/branchfolding-undef.mir @@ -0,0 +1,29 @@ +# RUN: llc -o - %s -march=x86 -run-pass branch-folder | FileCheck %s +# Test that tail merging drops undef flags that aren't present on all +# instructions to be merged. +--- | + define void @func() { ret void } +... +--- +# CHECK-LABEL: name: func +# CHECK: bb.1: +# CHECK: %eax = MOV32ri 2 +# CHECK-NOT: RET +# CHECK: bb.2: +# CHECK-NOT: RET 0, undef %eax +# CHECK: RET 0, %eax +name: func +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + JE_1 %bb.1, implicit undef %eflags + JMP_1 %bb.2 + + bb.1: + %eax = MOV32ri 2 + RET 0, %eax + + bb.2: + RET 0, undef %eax +... diff --git a/test/CodeGen/X86/no-and8ri8.ll b/test/CodeGen/X86/no-and8ri8.ll new file mode 100644 index 000000000000..57f33226602e --- /dev/null +++ b/test/CodeGen/X86/no-and8ri8.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx512f --show-mc-encoding < %s | FileCheck %s + +declare i1 @bar() + +; CHECK-LABEL: @foo +; CHECK-NOT: andb {{.*}} # encoding: [0x82, +define i1 @foo(i1 %i) nounwind { +entry: + br i1 %i, label %if, label %else + +if: + %r = call i1 @bar() + br label %else + +else: + %ret = phi i1 [%r, %if], [true, %entry] + ret i1 %ret +} diff --git a/test/CodeGen/X86/pr30298.ll b/test/CodeGen/X86/pr30298.ll new file mode 100644 index 000000000000..1e6dad0b20d1 --- /dev/null +++ b/test/CodeGen/X86/pr30298.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i386-pc-linux-gnu -mattr=+sse < %s | FileCheck %s + +@c = external global i32*, align 8 + +define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) nounwind { +; CHECK-LABEL: mul_2xi8: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl c, %esi +; CHECK-NEXT: movzbl 1(%edx,%ecx), %edi +; CHECK-NEXT: movzbl (%edx,%ecx), %edx +; CHECK-NEXT: movzbl 1(%eax,%ecx), %ebx +; CHECK-NEXT: movzbl (%eax,%ecx), %eax +; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: imull %edi, %ebx +; CHECK-NEXT: movl %ebx, 4(%esi,%ecx,4) +; CHECK-NEXT: movl %eax, (%esi,%ecx,4) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +entry: + %pre = load i32*, i32** @c + %tmp6 = getelementptr inbounds i8, i8* %a, i64 %index + %tmp7 = bitcast i8* %tmp6 to <2 x i8>* + %wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1 + %tmp8 = zext <2 x i8> %wide.load to <2 x i32> + %tmp10 = getelementptr inbounds i8, i8* %b, i64 %index + %tmp11 = bitcast i8* %tmp10 to <2 x i8>* + %wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1 + %tmp12 = zext <2 x i8> %wide.load17 to <2 x i32> + %tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8 + %tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index + %tmp15 = bitcast i32* %tmp14 to <2 x i32>* + store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 + ret void +} diff --git a/test/LTO/X86/Inputs/type-mapping-src.ll b/test/LTO/X86/Inputs/type-mapping-src.ll new file mode 100644 index 000000000000..3a80560a4486 --- /dev/null +++ b/test/LTO/X86/Inputs/type-mapping-src.ll @@ -0,0 +1,20 @@ +target triple = "x86_64-pc-windows-msvc18.0.0" + +%SrcType = type { i8 } +@x = external global %SrcType + +%CommonStruct = type opaque +@bar = internal global %CommonStruct* null, !dbg !0 + + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!12} +!0 = distinct !DIGlobalVariable(name: "bar", linkageName: "bar", scope: !1, file: !2, line: 2, type: !5, isLocal: false, isDefinition: true) +!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "b", directory: "/") +!3 = !{} +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64) +!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 1, flags: DIFlagFwdDecl, identifier: ".?AUS@@") +!12 = !{i32 2, !"Debug Info Version", i32 3} + diff --git a/test/LTO/X86/type-mapping-bug.ll b/test/LTO/X86/type-mapping-bug.ll new file mode 100644 index 000000000000..3a1891234c86 --- /dev/null +++ b/test/LTO/X86/type-mapping-bug.ll @@ -0,0 +1,50 @@ +; RUN: llvm-as -o %t.dst.bc %s +; RUN: llvm-as -o %t.src.bc %S/Inputs/type-mapping-src.ll +; RUN: llvm-lto %t.dst.bc %t.src.bc -o=/dev/null + +target triple = "x86_64-pc-windows-msvc18.0.0" + +; @x in Src will be linked with this @x, causing SrcType in Src to be mapped +; to %DstType. +%DstType = type { i8 } +@x = global %DstType zeroinitializer + +; The Src module will re-use our DINode for this type. +%CommonStruct = type { i32 } +@foo = internal global %CommonStruct zeroinitializer, !dbg !5 + +; That DINode will refer to this value, casted to %Tricky.1* (!11), +; which will then show up in Src's getIdentifiedStructTypes(). +@templateValueParam = global i8 zeroinitializer + +; Because of the names, we would try to map %Tricky.1 to %Tricky -- +; mapping a Dst type to another Dst type! This would assert when +; getting a mapping from %DstType, which has previously used as +; a destination type. Since these types are not in the source module, +; there should be no attempt to create a mapping involving them; +; both types should be left as they are. +%Tricky = type opaque +%Tricky.1 = type { %DstType* } + + +; Mark %Tricky used. +@use = global %Tricky* zeroinitializer + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!19} +!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4) +!2 = !DIFile(filename: "a", directory: "/") +!3 = !{} +!4 = !{!5} +!5 = distinct !DIGlobalVariable(name: "foo", linkageName: "foo", scope: !1, file: !2, line: 5, type: !6, isLocal: false, isDefinition: true) +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 5, size: 8, elements: !7, identifier: ".?AUS@@") +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !6, baseType: !9) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Template<&x>", file: !2, line: 3, size: 8, elements: !3, templateParams: !10, identifier: ".?AU?$Template@$1?x@@3UX@@A@@") +!10 = !{!11} + +!11 = !DITemplateValueParameter(type: !12, value: %Tricky.1* bitcast (i8* @templateValueParam to %Tricky.1*)) + +!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64) +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "X", file: !2, line: 1, size: 8, elements: !3, identifier: ".?AUX@@") +!19 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/test/MC/ARM/ldr-pseudo-wide.s b/test/MC/ARM/ldr-pseudo-wide.s new file mode 100644 index 000000000000..ae1799f706ac --- /dev/null +++ b/test/MC/ARM/ldr-pseudo-wide.s @@ -0,0 +1,71 @@ +@ Test case for PR30352 +@ Check that ldr.w is: +@ accepted and ignored for ARM +@ accepted and propagated for Thumb2 +@ rejected as needing Thumb2 for Thumb + +@RUN: llvm-mc -triple armv5-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s +@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-DARWIN-ARM --check-prefix=CHECK-DARWIN %s +@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s +@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-DARWIN-THUMB2 --check-prefix=CHECK-DARWIN %s +@RUN: not llvm-mc -triple thumbv6-unknown-linux-gnueabi %s 2>&1 | FileCheck --check-prefix=CHECK-THUMB %s +@RUN: not llvm-mc -triple thumbv6-base-apple-darwin %s 2>&1 | FileCheck --check-prefix=CHECK-THUMB %s +@ CHECK-LABEL: f1: +f1: + ldr r0, =0x10002 +@ CHECK-ARM: ldr r0, .Ltmp[[TMP0:[0-9]+]] +@ CHECK-DARWIN-ARM: ldr r0, Ltmp0 +@ CHECK-THUMB2: ldr r0, .Ltmp[[TMP0:[0-9]+]] +@ CHECK-DARWIN-THUMB2: ldr r0, Ltmp0 + + ldr.w r0, =0x10002 +@ CHECK-ARM: ldr r0, .Ltmp[[TMP1:[0-9]+]] +@ CHECK-DARWIN-ARM: ldr r0, Ltmp1 +@ CHECK-THUMB2: ldr.w r0, .Ltmp[[TMP1:[0-9]+]] +@ CHECK-DARWIN-THUMB2: ldr.w r0, Ltmp1 +@ CHECK-THUMB: error: instruction requires: thumb2 +@ CHECK-THUMB-NEXT: ldr.w r0, =0x10002 + +@ CHECK-LABEL: f2: +f2: + ldr r0, =foo +@ CHECK-ARM: ldr r0, .Ltmp[[TMP2:[0-9]+]] +@ CHECK-DARWIN-ARM: ldr r0, Ltmp2 +@ CHECK-THUMB2: ldr r0, .Ltmp[[TMP2:[0-9]+]] +@ CHECK-DARWIN-THUMB2: ldr r0, Ltmp2 + + ldr.w r0, =foo +@ CHECK-ARM: ldr r0, .Ltmp[[TMP3:[0-9]+]] +@ CHECK-DARWIN-ARM: ldr r0, Ltmp3 +@ CHECK-THUMB2: ldr.w r0, .Ltmp[[TMP3:[0-9]+]] +@ CHECK-DARWIN-THUMB2: ldr.w r0, Ltmp3 +@ CHECK-THUMB: error: instruction requires: thumb2 +@ CHECK-THUMB-NEXT: ldr.w r0, =foo + +@ CHECK-LABEL: f3: +f3: + ldr.w r1, =0x1 +@ CHECK-ARM: mov r1, #1 +@ CHECK-DARWIN-ARM: mov r1, #1 +@ CHECK-THUMB2: mov.w r1, #1 +@ CHECK-DARWIN-THUMB2: mov.w r1, #1 +@ CHECK-THUMB: error: instruction requires: thumb2 +@ CHECK-THUMB-NEXT: ldr.w r1, =0x1 + +@ CHECK: .Ltmp0: +@ CHECK-NEXT: .long 65538 +@ CHECK: .Ltmp1: +@ CHECK-NEXT: .long 65538 +@ CHECK: .Ltmp2: +@ CHECK-NEXT: .long foo +@ CHECK: .Ltmp3: +@ CHECK-NEXT: .long foo + +@ CHECK-DARWIN: Ltmp0: +@ CHECK-DARWIN-NEXT: .long 65538 +@ CHECK-DARWIN: Ltmp1: +@ CHECK-DARWIN-NEXT: .long 65538 +@ CHECK-DARWIN: Ltmp2: +@ CHECK-DARWIN-NEXT: .long foo +@ CHECK-DARWIN: Ltmp3: +@ CHECK-DARWIN-NEXT: .long foo diff --git a/test/ThinLTO/X86/Inputs/crash_debuginfo.ll b/test/ThinLTO/X86/Inputs/crash_debuginfo.ll new file mode 100644 index 000000000000..9bb9a2fb0c5c --- /dev/null +++ b/test/ThinLTO/X86/Inputs/crash_debuginfo.ll @@ -0,0 +1,33 @@ +; ModuleID = 'test/ThinLTO/X86/Inputs/crash_debuginfo.ll' +source_filename = "src.bc" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +%another_type = type { i32 } + +define void @bar(i32 %arg) { + %tmp = add i32 %arg, 0, !dbg !7 + unreachable +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "Apple LLVM version 8.0.0 (clang-800.0.25.1)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !3, imports: !2) +!1 = !DIFile(filename: "2.cpp", directory: "some_dir") +!2 = !{} +!3 = !{!4} +!4 = distinct !DIGlobalVariable(name: "a_global", linkageName: "a_global", scope: null, line: 52, type: !5, isLocal: true, isDefinition: true, variable: %another_type** undef) +!5 = !DISubroutineType(types: !2) +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DILocation(line: 728, column: 71, scope: !8, inlinedAt: !14) +!8 = distinct !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !5, isLocal: false, isDefinition: true, scopeLine: 727, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !10, variables: !11) +!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "some_other_class", scope: !1, file: !1, line: 197, size: 192, align: 64, elements: !2, templateParams: !2, identifier: "some_other_class") +!10 = !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !5, isLocal: false, isDefinition: false, scopeLine: 726, flags: DIFlagPrototyped, isOptimized: true) +!11 = !{!12} +!12 = !DILocalVariable(name: "caster", scope: !8, file: !1, line: 728, type: !13) +!13 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !8, file: !1, line: 728, size: 64, align: 64, elements: !2, identifier: "someclass") +!14 = distinct !DILocation(line: 795, column: 16, scope: !15) +!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 794, column: 7) +!16 = distinct !DISubprogram(name: "operator()", linkageName: "some_special_function", scope: null, file: !1, line: 783, type: !5, isLocal: true, isDefinition: true, scopeLine: 784, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !17, variables: !2) +!17 = !DISubprogram(name: "operator()", linkageName: "some_special_function", scope: null, file: !1, line: 783, type: !5, isLocal: false, isDefinition: false, scopeLine: 783, flags: DIFlagPrototyped, isOptimized: true) diff --git a/test/ThinLTO/X86/Inputs/import_opaque_type.ll b/test/ThinLTO/X86/Inputs/import_opaque_type.ll new file mode 100644 index 000000000000..fe2b2934724b --- /dev/null +++ b/test/ThinLTO/X86/Inputs/import_opaque_type.ll @@ -0,0 +1,15 @@ +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +%0 = type { i8 } + +%a = type { %0 * } + +define void @bar(%a *) { + ret void +} + +define void @baz() { + call void @bar(%a *null) + ret void +} diff --git a/test/ThinLTO/X86/crash_debuginfo.ll b/test/ThinLTO/X86/crash_debuginfo.ll new file mode 100644 index 000000000000..b250afab1ed6 --- /dev/null +++ b/test/ThinLTO/X86/crash_debuginfo.ll @@ -0,0 +1,46 @@ +; RUN: opt -module-summary -o %t-dst.bc %s +; RUN: opt -module-summary -o %t-src.bc %p/Inputs/crash_debuginfo.ll +; RUN: llvm-lto -thinlto -o %t-index %t-dst.bc %t-src.bc +; RUN: opt -function-import -inline -summary-file %t-index.thinlto.bc %t-dst.bc -o %t.out +; RUN: llvm-nm %t.out | FileCheck %s + +; Verify that we import bar and inline it. It use to crash importing due to ODR type uniquing +; CHECK-NOT: bar +; CHECK: foo +; CHECK-NOT: bar + +; ModuleID = 'test/ThinLTO/X86/crash_debuginfo.ll' +source_filename = "test/ThinLTO/X86/crash_debuginfo.ll" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +%some_type = type { i32 } + +define void @foo(i32 %arg) { + call void @bar(i32 %arg), !dbg !7 + unreachable +} + +declare void @bar(i32) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "Apple LLVM version 8.0.0 (clang-800.0.24.1)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2) +!1 = !DIFile(filename: "1.cpp", directory: "/another_dir") +!2 = !{!3} +!3 = distinct !DIGlobalVariable(name: "_", linkageName: "some_global", scope: null, file: !1, line: 20, type: !4, isLocal: true, isDefinition: true, variable: %some_type* undef) +!4 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "slice_nil", file: !1, line: 13, size: 64, align: 64, elements: !5, identifier: "_ZTSN5boost6python3api9slice_nilE") +!5 = !{} +!6 = !{i32 2, !"Debug Info Version", i32 3} +!7 = distinct !DILocation(line: 728, column: 71, scope: !8, inlinedAt: !15) +!8 = distinct !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !10, isLocal: false, isDefinition: true, scopeLine: 727, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !11, variables: !12) +!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "some_other_class", file: !1, line: 197, size: 192, align: 64, elements: !5, templateParams: !5, identifier: "some_other_class") +!10 = !DISubroutineType(types: !5) +!11 = !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !10, isLocal: false, isDefinition: false, scopeLine: 726, flags: DIFlagPrototyped, isOptimized: true) +!12 = !{!13} +!13 = !DILocalVariable(name: "caster", scope: !8, file: !1, line: 728, type: !14) +!14 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !8, file: !1, line: 728, size: 64, align: 64, elements: !5, identifier: "someclass") +!15 = distinct !DILocation(line: 87, column: 9, scope: !16) +!16 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !9, line: 73, type: !10, isLocal: false, isDefinition: true, scopeLine: 74, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !17, variables: !5) +!17 = !DISubprogram(name: "foo", linkageName: "foo", scope: !9, file: !1, line: 83, type: !10, isLocal: false, isDefinition: false, scopeLine: 83, flags: DIFlagPrototyped, isOptimized: true) diff --git a/test/ThinLTO/X86/import_opaque_type.ll b/test/ThinLTO/X86/import_opaque_type.ll new file mode 100644 index 000000000000..bfa251abacab --- /dev/null +++ b/test/ThinLTO/X86/import_opaque_type.ll @@ -0,0 +1,27 @@ +; Do setup work for all below tests: generate bitcode and combined index +; RUN: opt -module-summary %s -o %t.bc +; RUN: opt -module-summary %p/Inputs/import_opaque_type.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc + +; Check that we import correctly the imported type to replace the opaque one here +; RUN: llvm-lto -thinlto-action=import %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s + + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; CHECK: %0 = type { i8 } +%0 = type opaque + +%a = type { %0 * } + +declare void @baz() +define void @foo(%a *) { + call void @baz() + ret void +} + +define i32 @main() { + call void @foo(%a *null) + ret i32 0 +} diff --git a/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll new file mode 100644 index 000000000000..3f8fdcc8eafb --- /dev/null +++ b/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -basicaa -gvn -S | FileCheck %s + +declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) +declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) + +; This test ensures that masked scatter and gather operations, which take vectors of pointers, +; do not have pointer aliasing ignored when being processed. +; No scatter/gather calls should end up eliminated +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +; CHECK: llvm.masked.scatter +; CHECK: llvm.masked.gather +define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) { +entry: + ; Just some temporary storage + %tmp.0 = alloca i32 + %tmp.1 = alloca i32 + %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0 + %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1 + ; Read from in1 and in2 + %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> , <2 x i32> undef) #1 + %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in1 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in1 from the allocas + ; This gather should alias the scatter we just saw + %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to the allocas + call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> ); + ; Read in2 from the allocas + ; This gather should alias the scatter we just saw, and not be eliminated + %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> , <2 x i32> undef) #1 + ; Store in2 to out for good measure + %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0 + %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1 + store i32 %tmp.v.1.0, i32* %out + %out.1 = getelementptr i32, i32* %out, i32 1 + store i32 %tmp.v.1.1, i32* %out.1 + ret void +} diff --git a/test/Transforms/JumpThreading/pr27840.ll b/test/Transforms/JumpThreading/pr27840.ll new file mode 100644 index 000000000000..cbee2af67fae --- /dev/null +++ b/test/Transforms/JumpThreading/pr27840.ll @@ -0,0 +1,33 @@ +; RUN: opt -jump-threading -S < %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +declare void @helper() +declare i32 @__gxx_personality_v0(...) + + +define void @pr27840(i8* %call, i1 %A) personality i32(...)* @__gxx_personality_v0 { +entry: + invoke void @helper() + to label %invoke.cont unwind label %lpad + +; Don't jump threading; we can't split the critical edge from entry to lpad. +; CHECK-LABEL: @pr27840 +; CHECK: invoke +; CHECK-NEXT: to label %invoke.cont unwind label %lpad + +invoke.cont: + invoke void @helper() + to label %nowhere unwind label %lpad + +lpad: + %b = phi i1 [ true, %invoke.cont ], [ false, %entry ] + landingpad { i8*, i32 } + cleanup + %xor = xor i1 %b, %A + br i1 %xor, label %nowhere, label %invoke.cont + +nowhere: + unreachable +} diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll index a3b5a598d220..e91d5b84324d 100644 --- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK: br ;CHECK: getelementptr ;CHECK-DAG: getelementptr -;CHECK-DAG: icmp uge -;CHECK-DAG: icmp uge -;CHECK-DAG: icmp uge -;CHECK-DAG: icmp uge +;CHECK-DAG: icmp ugt +;CHECK-DAG: icmp ugt +;CHECK-DAG: icmp ugt +;CHECK-DAG: icmp ugt ;CHECK-DAG: and ;CHECK-DAG: and ;CHECK: br diff --git a/test/Transforms/LoopVectorize/tbaa-nodep.ll b/test/Transforms/LoopVectorize/tbaa-nodep.ll index 06d000230027..3e79d47de08c 100644 --- a/test/Transforms/LoopVectorize/tbaa-nodep.ll +++ b/test/Transforms/LoopVectorize/tbaa-nodep.ll @@ -36,7 +36,7 @@ for.end: ; preds = %for.body ; CHECK: ret i32 0 ; CHECK-NOTBAA-LABEL: @test1 -; CHECK-NOTBAA: icmp uge i32* +; CHECK-NOTBAA: icmp ugt i32* ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa ; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa @@ -70,8 +70,8 @@ for.end: ; preds = %for.body ; required. Without TBAA, however, two checks are required. ; CHECK-LABEL: @test2 -; CHECK: icmp uge float* -; CHECK: icmp uge float* +; CHECK: icmp ugt float* +; CHECK: icmp ugt float* ; CHECK-NOT: icmp uge i32* ; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa @@ -80,10 +80,10 @@ for.end: ; preds = %for.body ; CHECK: ret i32 0 ; CHECK-NOTBAA-LABEL: @test2 -; CHECK-NOTBAA: icmp uge float* -; CHECK-NOTBAA: icmp uge float* -; CHECK-NOTBAA-DAG: icmp uge float* -; CHECK-NOTBAA-DAG: icmp uge i32* +; CHECK-NOTBAA: icmp ugt float* +; CHECK-NOTBAA: icmp ugt float* +; CHECK-NOTBAA-DAG: icmp ugt float* +; CHECK-NOTBAA-DAG: icmp ugt i32* ; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa ; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa diff --git a/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll index 9eacbde7710a..022ea734cec2 100644 --- a/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ b/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -8,15 +8,15 @@ ; CHECK-NEXT: Loop Versioning found to be beneficial ; ; CHECK: for.body3: -; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ] +; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ] ; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ] ; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64 ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom ; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6 -; CHECK-NEXT: %add8 = add nsw i32 %add86, %add +; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add ; CHECK-NEXT: %inc = add nuw i32 %j.113, 1 ; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr -; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7 +; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !7 define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 { entry: %cmp14 = icmp eq i32 %itr, 0