Vendor import of llvm release_39 branch r287912:
https://llvm.org/svn/llvm-project/llvm/branches/release_39@287912
This commit is contained in:
parent
60a9e02f55
commit
6449741f4c
@ -27,7 +27,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
|
||||
set(LLVM_VERSION_MINOR 9)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_PATCH)
|
||||
set(LLVM_VERSION_PATCH 0)
|
||||
set(LLVM_VERSION_PATCH 1)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_SUFFIX)
|
||||
set(LLVM_VERSION_SUFFIX "")
|
||||
|
@ -334,9 +334,11 @@ class RuntimePointerChecking {
|
||||
struct PointerInfo {
|
||||
/// Holds the pointer value that we need to check.
|
||||
TrackingVH<Value> PointerValue;
|
||||
/// Holds the pointer value at the beginning of the loop.
|
||||
/// Holds the smallest byte address accessed by the pointer throughout all
|
||||
/// iterations of the loop.
|
||||
const SCEV *Start;
|
||||
/// Holds the pointer value at the end of the loop.
|
||||
/// Holds the largest byte address accessed by the pointer throughout all
|
||||
/// iterations of the loop, plus 1.
|
||||
const SCEV *End;
|
||||
/// Holds the information if this pointer is used for writing to memory.
|
||||
bool IsWritePtr;
|
||||
|
@ -72,7 +72,7 @@ class RTDyldMemoryManager : public MCJITMemoryManager,
|
||||
}
|
||||
|
||||
void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override {
|
||||
registerEHFramesInProcess(Addr, Size);
|
||||
deregisterEHFramesInProcess(Addr, Size);
|
||||
}
|
||||
|
||||
/// This method returns the address of the specified function or variable in
|
||||
|
@ -668,13 +668,12 @@ def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
|
||||
LLVMVectorSameWidth<0, llvm_i1_ty>,
|
||||
LLVMMatchType<0>],
|
||||
[IntrReadMem, IntrArgMemOnly]>;
|
||||
[IntrReadMem]>;
|
||||
|
||||
def int_masked_scatter: Intrinsic<[],
|
||||
[llvm_anyvector_ty,
|
||||
LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
|
||||
LLVMVectorSameWidth<0, llvm_i1_ty>],
|
||||
[IntrArgMemOnly]>;
|
||||
LLVMVectorSameWidth<0, llvm_i1_ty>]>;
|
||||
|
||||
// Test whether a pointer is associated with a type metadata identifier.
|
||||
def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
|
||||
|
@ -59,6 +59,8 @@ class TypeFinder {
|
||||
|
||||
StructType *&operator[](unsigned Idx) { return StructTypes[Idx]; }
|
||||
|
||||
DenseSet<const MDNode *> &getVisitedMetadata() { return VisitedMetadata; }
|
||||
|
||||
private:
|
||||
/// incorporateType - This method adds the type to the list of used
|
||||
/// structures if it's not in there already.
|
||||
|
@ -148,6 +148,19 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
||||
return OrigSCEV;
|
||||
}
|
||||
|
||||
/// Calculate Start and End points of memory access.
|
||||
/// Let's assume A is the first access and B is a memory access on N-th loop
|
||||
/// iteration. Then B is calculated as:
|
||||
/// B = A + Step*N .
|
||||
/// Step value may be positive or negative.
|
||||
/// N is a calculated back-edge taken count:
|
||||
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
|
||||
/// Start and End points are calculated in the following way:
|
||||
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
|
||||
/// where SizeOfElt is the size of single memory access in bytes.
|
||||
///
|
||||
/// There is no conflict when the intervals are disjoint:
|
||||
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
|
||||
void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
||||
unsigned DepSetId, unsigned ASId,
|
||||
const ValueToValueMap &Strides,
|
||||
@ -176,12 +189,17 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
|
||||
if (CStep->getValue()->isNegative())
|
||||
std::swap(ScStart, ScEnd);
|
||||
} else {
|
||||
// Fallback case: the step is not constant, but the we can still
|
||||
// Fallback case: the step is not constant, but we can still
|
||||
// get the upper and lower bounds of the interval by using min/max
|
||||
// expressions.
|
||||
ScStart = SE->getUMinExpr(ScStart, ScEnd);
|
||||
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
|
||||
}
|
||||
// Add the size of the pointed element to ScEnd.
|
||||
unsigned EltSize =
|
||||
Ptr->getType()->getPointerElementType()->getScalarSizeInBits() / 8;
|
||||
const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
|
||||
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
|
||||
}
|
||||
|
||||
Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
|
||||
@ -1863,9 +1881,17 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
|
||||
Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
|
||||
Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
|
||||
|
||||
Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
|
||||
// [A|B].Start points to the first accessed byte under base [A|B].
|
||||
// [A|B].End points to the last accessed byte, plus one.
|
||||
// There is no conflict when the intervals are disjoint:
|
||||
// NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
|
||||
//
|
||||
// bound0 = (B.Start < A.End)
|
||||
// bound1 = (A.Start < B.End)
|
||||
// IsConflict = bound0 & bound1
|
||||
Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
|
||||
FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
|
||||
Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
|
||||
Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
|
||||
FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
|
||||
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
|
||||
FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
|
||||
|
@ -776,9 +776,8 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
|
||||
}
|
||||
|
||||
static void
|
||||
mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
|
||||
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
|
||||
MachineBasicBlock &MBBCommon) {
|
||||
// Merge MMOs from memory operations in the common block.
|
||||
MachineBasicBlock *MBB = MBBIStartPos->getParent();
|
||||
// Note CommonTailLen does not necessarily matches the size of
|
||||
// the common BB nor all its instructions because of debug
|
||||
@ -808,8 +807,18 @@ mergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
|
||||
"Reached BB end within common tail length!");
|
||||
assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
|
||||
|
||||
// Merge MMOs from memory operations in the common block.
|
||||
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
|
||||
MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
|
||||
// Drop undef flags if they aren't present in all merged instructions.
|
||||
for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
|
||||
MachineOperand &MO = MBBICommon->getOperand(I);
|
||||
if (MO.isReg() && MO.isUndef()) {
|
||||
const MachineOperand &OtherMO = MBBI->getOperand(I);
|
||||
if (!OtherMO.isUndef())
|
||||
MO.setIsUndef(false);
|
||||
}
|
||||
}
|
||||
|
||||
++MBBI;
|
||||
++MBBICommon;
|
||||
@ -928,8 +937,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
|
||||
continue;
|
||||
DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
|
||||
<< (i == e-1 ? "" : ", "));
|
||||
// Merge MMOs from memory operations as needed.
|
||||
mergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB);
|
||||
// Merge operations (MMOs, undef flags)
|
||||
mergeOperations(SameTails[i].getTailStartPos(), *MBB);
|
||||
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
|
||||
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
|
||||
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
|
||||
|
@ -694,6 +694,14 @@ void IRLinker::computeTypeMapping() {
|
||||
if (!ST->hasName())
|
||||
continue;
|
||||
|
||||
if (TypeMap.DstStructTypesSet.hasType(ST)) {
|
||||
// This is actually a type from the destination module.
|
||||
// getIdentifiedStructTypes() can have found it by walking debug info
|
||||
// metadata nodes, some of which get linked by name when ODR Type Uniquing
|
||||
// is enabled on the Context, from the source to the destination module.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check to see if there is a dot in the name followed by a digit.
|
||||
size_t DotPos = ST->getName().rfind('.');
|
||||
if (DotPos == 0 || DotPos == StringRef::npos ||
|
||||
@ -1336,13 +1344,19 @@ bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {
|
||||
|
||||
IRMover::IRMover(Module &M) : Composite(M) {
|
||||
TypeFinder StructTypes;
|
||||
StructTypes.run(M, true);
|
||||
StructTypes.run(M, /* OnlyNamed */ false);
|
||||
for (StructType *Ty : StructTypes) {
|
||||
if (Ty->isOpaque())
|
||||
IdentifiedStructTypes.addOpaque(Ty);
|
||||
else
|
||||
IdentifiedStructTypes.addNonOpaque(Ty);
|
||||
}
|
||||
// Self-map metadatas in the destination module. This is needed when
|
||||
// DebugTypeODRUniquing is enabled on the LLVMContext, since metadata in the
|
||||
// destination module may be reached from the source module.
|
||||
for (auto *MD : StructTypes.getVisitedMetadata()) {
|
||||
SharedMDs[MD].reset(const_cast<MDNode *>(MD));
|
||||
}
|
||||
}
|
||||
|
||||
Error IRMover::move(
|
||||
|
@ -412,7 +412,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
|
||||
|
||||
if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS))
|
||||
return;
|
||||
#if HAVE_DLFCN_H && __GNUG__
|
||||
#if HAVE_DLFCN_H && __GNUG__ && !defined(__CYGWIN__)
|
||||
int width = 0;
|
||||
for (int i = 0; i < depth; ++i) {
|
||||
Dl_info dlinfo;
|
||||
|
@ -4819,6 +4819,10 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm",
|
||||
def t2LDRConstPool
|
||||
: t2AsmPseudo<"ldr${p} $Rt, $immediate",
|
||||
(ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>;
|
||||
// Version w/ the .w suffix.
|
||||
def : t2InstAlias<"ldr${p}.w $Rt, $immediate",
|
||||
(t2LDRConstPool GPRnopc:$Rt,
|
||||
const_pool_asm_imm:$immediate, pred:$p)>;
|
||||
|
||||
// PLD/PLDW/PLI with alternate literal form.
|
||||
def : t2InstAlias<"pld${p} $addr",
|
||||
|
@ -6933,6 +6933,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
|
||||
else if (Inst.getOpcode() == ARM::t2LDRConstPool)
|
||||
TmpInst.setOpcode(ARM::t2LDRpci);
|
||||
const ARMOperand &PoolOperand =
|
||||
(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
|
||||
static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ?
|
||||
static_cast<ARMOperand &>(*Operands[4]) :
|
||||
static_cast<ARMOperand &>(*Operands[3]);
|
||||
const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm();
|
||||
// If SubExprVal is a constant we may be able to use a MOV
|
||||
|
@ -665,9 +665,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
|
||||
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
|
||||
}
|
||||
|
||||
if (Subtarget.hasP9Vector()) {
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
@ -7846,6 +7847,17 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
|
||||
return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
|
||||
"Should only be called for ISD::INSERT_VECTOR_ELT");
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
|
||||
// We have legal lowering for constant indices but not for variable ones.
|
||||
if (C)
|
||||
return Op;
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc dl(Op);
|
||||
@ -8248,6 +8260,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
|
||||
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::MUL: return LowerMUL(Op, DAG);
|
||||
|
||||
// For counter-based loop handling.
|
||||
@ -8372,7 +8385,9 @@ Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
|
||||
MachineBasicBlock *
|
||||
PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
unsigned AtomicSize,
|
||||
unsigned BinOpcode) const {
|
||||
unsigned BinOpcode,
|
||||
unsigned CmpOpcode,
|
||||
unsigned CmpPred) const {
|
||||
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
|
||||
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
|
||||
@ -8412,8 +8427,12 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
DebugLoc dl = MI.getDebugLoc();
|
||||
|
||||
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *loop2MBB =
|
||||
CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
|
||||
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
F->insert(It, loopMBB);
|
||||
if (CmpOpcode)
|
||||
F->insert(It, loop2MBB);
|
||||
F->insert(It, exitMBB);
|
||||
exitMBB->splice(exitMBB->begin(), BB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), BB->end());
|
||||
@ -8435,11 +8454,40 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
// st[wd]cx. r0, ptr
|
||||
// bne- loopMBB
|
||||
// fallthrough --> exitMBB
|
||||
|
||||
// For max/min...
|
||||
// loopMBB:
|
||||
// l[wd]arx dest, ptr
|
||||
// cmpl?[wd] incr, dest
|
||||
// bgt exitMBB
|
||||
// loop2MBB:
|
||||
// st[wd]cx. dest, ptr
|
||||
// bne- loopMBB
|
||||
// fallthrough --> exitMBB
|
||||
|
||||
BB = loopMBB;
|
||||
BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
|
||||
.addReg(ptrA).addReg(ptrB);
|
||||
if (BinOpcode)
|
||||
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
|
||||
if (CmpOpcode) {
|
||||
// Signed comparisons of byte or halfword values must be sign-extended.
|
||||
if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
|
||||
unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
|
||||
BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
|
||||
ExtReg).addReg(dest);
|
||||
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
|
||||
.addReg(incr).addReg(ExtReg);
|
||||
} else
|
||||
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
|
||||
.addReg(incr).addReg(dest);
|
||||
|
||||
BuildMI(BB, dl, TII->get(PPC::BCC))
|
||||
.addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
|
||||
BB->addSuccessor(loop2MBB);
|
||||
BB->addSuccessor(exitMBB);
|
||||
BB = loop2MBB;
|
||||
}
|
||||
BuildMI(BB, dl, TII->get(StoreMnemonic))
|
||||
.addReg(TmpReg).addReg(ptrA).addReg(ptrB);
|
||||
BuildMI(BB, dl, TII->get(PPC::BCC))
|
||||
@ -8457,10 +8505,13 @@ MachineBasicBlock *
|
||||
PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
|
||||
MachineBasicBlock *BB,
|
||||
bool is8bit, // operation
|
||||
unsigned BinOpcode) const {
|
||||
unsigned BinOpcode,
|
||||
unsigned CmpOpcode,
|
||||
unsigned CmpPred) const {
|
||||
// If we support part-word atomic mnemonics, just use them
|
||||
if (Subtarget.hasPartwordAtomics())
|
||||
return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
|
||||
return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode,
|
||||
CmpOpcode, CmpPred);
|
||||
|
||||
// This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
|
||||
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
|
||||
@ -8482,8 +8533,12 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
|
||||
DebugLoc dl = MI.getDebugLoc();
|
||||
|
||||
MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *loop2MBB =
|
||||
CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
|
||||
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
|
||||
F->insert(It, loopMBB);
|
||||
if (CmpOpcode)
|
||||
F->insert(It, loop2MBB);
|
||||
F->insert(It, exitMBB);
|
||||
exitMBB->splice(exitMBB->begin(), BB,
|
||||
std::next(MachineBasicBlock::iterator(MI)), BB->end());
|
||||
@ -8568,6 +8623,32 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
|
||||
.addReg(TmpDestReg).addReg(MaskReg);
|
||||
BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
|
||||
.addReg(TmpReg).addReg(MaskReg);
|
||||
if (CmpOpcode) {
|
||||
// For unsigned comparisons, we can directly compare the shifted values.
|
||||
// For signed comparisons we shift and sign extend.
|
||||
unsigned SReg = RegInfo.createVirtualRegister(RC);
|
||||
BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), SReg)
|
||||
.addReg(TmpDestReg).addReg(MaskReg);
|
||||
unsigned ValueReg = SReg;
|
||||
unsigned CmpReg = Incr2Reg;
|
||||
if (CmpOpcode == PPC::CMPW) {
|
||||
ValueReg = RegInfo.createVirtualRegister(RC);
|
||||
BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
|
||||
.addReg(SReg).addReg(ShiftReg);
|
||||
unsigned ValueSReg = RegInfo.createVirtualRegister(RC);
|
||||
BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
|
||||
.addReg(ValueReg);
|
||||
ValueReg = ValueSReg;
|
||||
CmpReg = incr;
|
||||
}
|
||||
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
|
||||
.addReg(CmpReg).addReg(ValueReg);
|
||||
BuildMI(BB, dl, TII->get(PPC::BCC))
|
||||
.addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
|
||||
BB->addSuccessor(loop2MBB);
|
||||
BB->addSuccessor(exitMBB);
|
||||
BB = loop2MBB;
|
||||
}
|
||||
BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
|
||||
.addReg(Tmp3Reg).addReg(Tmp2Reg);
|
||||
BuildMI(BB, dl, TII->get(PPC::STWCX))
|
||||
@ -9074,6 +9155,42 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
|
||||
BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
|
||||
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
|
||||
BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
|
||||
BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
|
||||
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
|
||||
BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
|
||||
BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
|
||||
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
|
||||
BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
|
||||
BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
|
||||
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
|
||||
BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
|
||||
BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
|
||||
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
|
||||
BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
|
||||
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
|
||||
|
@ -585,11 +585,15 @@ namespace llvm {
|
||||
MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB,
|
||||
unsigned AtomicSize,
|
||||
unsigned BinOpcode) const;
|
||||
unsigned BinOpcode,
|
||||
unsigned CmpOpcode = 0,
|
||||
unsigned CmpPred = 0) const;
|
||||
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB,
|
||||
bool is8bit,
|
||||
unsigned Opcode) const;
|
||||
unsigned Opcode,
|
||||
unsigned CmpOpcode = 0,
|
||||
unsigned CmpPred = 0) const;
|
||||
|
||||
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
@ -824,6 +828,7 @@ namespace llvm {
|
||||
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -224,6 +224,18 @@ let usesCustomInserter = 1 in {
|
||||
def ATOMIC_LOAD_NAND_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
|
||||
[(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MIN_I64",
|
||||
[(set i64:$dst, (atomic_load_min_64 xoaddr:$ptr, i64:$incr))]>;
|
||||
def ATOMIC_LOAD_MAX_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_MAX_I64",
|
||||
[(set i64:$dst, (atomic_load_max_64 xoaddr:$ptr, i64:$incr))]>;
|
||||
def ATOMIC_LOAD_UMIN_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMIN_I64",
|
||||
[(set i64:$dst, (atomic_load_umin_64 xoaddr:$ptr, i64:$incr))]>;
|
||||
def ATOMIC_LOAD_UMAX_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_UMAX_I64",
|
||||
[(set i64:$dst, (atomic_load_umax_64 xoaddr:$ptr, i64:$incr))]>;
|
||||
|
||||
def ATOMIC_CMP_SWAP_I64 : Pseudo<
|
||||
(outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
|
||||
|
@ -1509,6 +1509,18 @@ let usesCustomInserter = 1 in {
|
||||
def ATOMIC_LOAD_NAND_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
|
||||
[(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I8",
|
||||
[(set i32:$dst, (atomic_load_min_8 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MAX_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I8",
|
||||
[(set i32:$dst, (atomic_load_max_8 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMIN_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I8",
|
||||
[(set i32:$dst, (atomic_load_umin_8 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMAX_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I8",
|
||||
[(set i32:$dst, (atomic_load_umax_8 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_ADD_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
|
||||
[(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
@ -1527,6 +1539,18 @@ let usesCustomInserter = 1 in {
|
||||
def ATOMIC_LOAD_NAND_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
|
||||
[(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I16",
|
||||
[(set i32:$dst, (atomic_load_min_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MAX_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I16",
|
||||
[(set i32:$dst, (atomic_load_max_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMIN_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I16",
|
||||
[(set i32:$dst, (atomic_load_umin_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMAX_I16 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I16",
|
||||
[(set i32:$dst, (atomic_load_umax_16 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_ADD_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
|
||||
[(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
@ -1545,6 +1569,18 @@ let usesCustomInserter = 1 in {
|
||||
def ATOMIC_LOAD_NAND_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
|
||||
[(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MIN_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MIN_I32",
|
||||
[(set i32:$dst, (atomic_load_min_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_MAX_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_MAX_I32",
|
||||
[(set i32:$dst, (atomic_load_max_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMIN_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMIN_I32",
|
||||
[(set i32:$dst, (atomic_load_umin_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
def ATOMIC_LOAD_UMAX_I32 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_UMAX_I32",
|
||||
[(set i32:$dst, (atomic_load_umax_32 xoaddr:$ptr, i32:$incr))]>;
|
||||
|
||||
def ATOMIC_CMP_SWAP_I8 : Pseudo<
|
||||
(outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
|
||||
|
@ -8656,6 +8656,17 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
|
||||
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
|
||||
DAG.getMachineFunction().getMachineMemOperand(
|
||||
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
||||
|
||||
// Make sure the newly-created LOAD is in the same position as Ld in
|
||||
// terms of dependency. We create a TokenFactor for Ld and V,
|
||||
// and update uses of Ld's output chain to use the TokenFactor.
|
||||
if (Ld->hasAnyUseOfValue(1)) {
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
|
||||
SDValue(Ld, 1), SDValue(V.getNode(), 1));
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
|
||||
DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
|
||||
SDValue(V.getNode(), 1));
|
||||
}
|
||||
} else if (!BroadcastFromReg) {
|
||||
// We can't broadcast from a vector register.
|
||||
return SDValue();
|
||||
@ -27516,7 +27527,8 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget &Subtarget) {
|
||||
// pmulld is supported since SSE41. It is better to use pmulld
|
||||
// instead of pmullw+pmulhw.
|
||||
if (Subtarget.hasSSE41())
|
||||
// pmullw/pmulhw are not supported by SSE.
|
||||
if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
|
||||
return SDValue();
|
||||
|
||||
ShrinkMode Mode;
|
||||
|
@ -2124,7 +2124,7 @@ let Predicates = [HasAVX512] in {
|
||||
(COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i8 GR8:$src))),
|
||||
(COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri8 $src, (i8 1)),
|
||||
(COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri $src, (i8 1)),
|
||||
sub_8bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))),
|
||||
|
@ -1322,6 +1322,10 @@ bool JumpThreadingPass::ProcessBranchOnXOR(BinaryOperator *BO) {
|
||||
if (!isa<PHINode>(BB->front()))
|
||||
return false;
|
||||
|
||||
// If this BB is a landing pad, we won't be able to split the edge into it.
|
||||
if (BB->isEHPad())
|
||||
return false;
|
||||
|
||||
// If we have a xor as the branch input to this block, and we know that the
|
||||
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
|
||||
// the condition into the predecessor and fix that value to true, saving some
|
||||
|
@ -0,0 +1,51 @@
|
||||
; RUN: opt -analyze --loop-accesses %s | FileCheck %s
|
||||
|
||||
; This test verifies run-time boundary check of memory accesses.
|
||||
; The original loop:
|
||||
; void fastCopy(const char* src, char* op) {
|
||||
; int len = 32;
|
||||
; while (len > 0) {
|
||||
; *(reinterpret_cast<long long*>(op)) = *(reinterpret_cast<const long long*>(src));
|
||||
; src += 8;
|
||||
; op += 8;
|
||||
; len -= 8;
|
||||
; }
|
||||
; }
|
||||
; Boundaries calculations before this patch:
|
||||
; (Low: %src High: (24 + %src))
|
||||
; and the actual distance between two pointers was 31, (%op - %src = 31)
|
||||
; IsConflict = (24 > 31) = false -> execution is directed to the vectorized loop.
|
||||
; The loop was vectorized to 4, 32 byte memory access ( <4 x i64> ),
|
||||
; store a value at *%op touched memory under *%src.
|
||||
|
||||
;CHECK: Printing analysis 'Loop Access Analysis' for function 'fastCopy'
|
||||
;CHECK: (Low: %op High: (32 + %op))
|
||||
;CHECK: (Low: %src High: (32 + %src))
|
||||
|
||||
define void @fastCopy(i8* nocapture readonly %src, i8* nocapture %op) {
|
||||
entry:
|
||||
br label %while.body.preheader
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%len.addr.07 = phi i32 [ %sub, %while.body ], [ 32, %while.body.preheader ]
|
||||
%op.addr.06 = phi i8* [ %add.ptr1, %while.body ], [ %op, %while.body.preheader ]
|
||||
%src.addr.05 = phi i8* [ %add.ptr, %while.body ], [ %src, %while.body.preheader ]
|
||||
%0 = bitcast i8* %src.addr.05 to i64*
|
||||
%1 = load i64, i64* %0, align 8
|
||||
%2 = bitcast i8* %op.addr.06 to i64*
|
||||
store i64 %1, i64* %2, align 8
|
||||
%add.ptr = getelementptr inbounds i8, i8* %src.addr.05, i64 8
|
||||
%add.ptr1 = getelementptr inbounds i8, i8* %op.addr.06, i64 8
|
||||
%sub = add nsw i32 %len.addr.07, -8
|
||||
%cmp = icmp sgt i32 %len.addr.07, 8
|
||||
br i1 %cmp, label %while.body, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
@ -96,15 +96,15 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: %c High: (78 + %c))
|
||||
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
||||
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
||||
; CHECK-NEXT: Member: {%c,+,4}
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (40 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
||||
; CHECK-NEXT: Member: {(2 + %a)<nsw>,+,2}
|
||||
; CHECK-NEXT: Member: {%a,+,2}
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: %b High: (38 + %b))
|
||||
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
||||
; CHECK-NEXT: Member: {%b,+,2}
|
||||
|
||||
define void @testg(i16* %a,
|
||||
@ -168,15 +168,15 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxB = getelementptr i16, i16* %b, i64 %ind
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: %c High: (78 + %c))
|
||||
; CHECK-NEXT: (Low: %c High: (80 + %c))
|
||||
; CHECK-NEXT: Member: {(2 + %c)<nsw>,+,4}
|
||||
; CHECK-NEXT: Member: {%c,+,4}
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (40 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (42 + %a))
|
||||
; CHECK-NEXT: Member: {(2 + %a),+,2}
|
||||
; CHECK-NEXT: Member: {%a,+,2}
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: %b High: (38 + %b))
|
||||
; CHECK-NEXT: (Low: %b High: (40 + %b))
|
||||
; CHECK-NEXT: Member: {%b,+,2}
|
||||
|
||||
define void @testh(i16* %a,
|
||||
@ -247,13 +247,13 @@ for.end: ; preds = %for.body
|
||||
; CHECK-NEXT: %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
|
||||
; CHECK-NEXT: Grouped accesses:
|
||||
; CHECK-NEXT: Group {{.*}}[[ZERO]]:
|
||||
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (9998 + (2 * %offset) + %a))
|
||||
; CHECK-NEXT: (Low: ((2 * %offset) + %a)<nsw> High: (10000 + (2 * %offset) + %a))
|
||||
; CHECK-NEXT: Member: {((2 * %offset) + %a)<nsw>,+,2}<nsw><%for.body>
|
||||
; CHECK-NEXT: Group {{.*}}[[ONE]]:
|
||||
; CHECK-NEXT: (Low: %a High: (9998 + %a))
|
||||
; CHECK-NEXT: (Low: %a High: (10000 + %a))
|
||||
; CHECK-NEXT: Member: {%a,+,2}<%for.body>
|
||||
; CHECK-NEXT: Group {{.*}}[[TWO]]:
|
||||
; CHECK-NEXT: (Low: (20000 + %a) High: (29998 + %a))
|
||||
; CHECK-NEXT: (Low: (20000 + %a) High: (30000 + %a))
|
||||
; CHECK-NEXT: Member: {(20000 + %a),+,2}<%for.body>
|
||||
|
||||
define void @testi(i16* %a,
|
||||
|
@ -16,7 +16,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnueabi"
|
||||
|
||||
; CHECK: function 'f':
|
||||
; CHECK: (Low: (20000 + %a) High: (60000 + %a)<nsw>)
|
||||
; CHECK: (Low: (20000 + %a) High: (60004 + %a))
|
||||
|
||||
@B = common global i32* null, align 8
|
||||
@A = common global i32* null, align 8
|
||||
@ -59,7 +59,7 @@ for.end: ; preds = %for.body
|
||||
; Here it is not obvious what the limits are, since 'step' could be negative.
|
||||
|
||||
; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
|
||||
; CHECK: High: ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a))
|
||||
; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
|
||||
|
||||
define void @g(i64 %step) {
|
||||
entry:
|
||||
|
435
test/CodeGen/PowerPC/atomic-minmax.ll
Normal file
435
test/CodeGen/PowerPC/atomic-minmax.ll
Normal file
@ -0,0 +1,435 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
define void @a32min(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw min i32* %minimum, i32 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a32min
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stwcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a32max(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw max i32* %minimum, i32 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a32max
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stwcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a32umin(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i32* %minimum, i32 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a32umin
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stwcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a32umax(i32* nocapture dereferenceable(4) %minimum, i32 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i32* %minimum, i32 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a32umax
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stwcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw min i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a16min
|
||||
; CHECK: lharx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: sthcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw max i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a16max
|
||||
; CHECK: lharx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: sthcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a16umin
|
||||
; CHECK: lharx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: sthcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a16umax
|
||||
; CHECK: lharx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: sthcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw min i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a8min
|
||||
; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stbcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw max i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a8max
|
||||
; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stbcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a8umin
|
||||
; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stbcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #1 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a8umax
|
||||
; CHECK: lbarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmplw 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stbcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a64min(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw min i64* %minimum, i64 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a64min
|
||||
; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpd 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stdcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a64max(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw max i64* %minimum, i64 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a64max
|
||||
; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpd 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stdcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a64umin(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i64* %minimum, i64 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a64umin
|
||||
; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpld 4, [[OLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: stdcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @a64umax(i64* nocapture dereferenceable(4) %minimum, i64 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i64* %minimum, i64 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @a64umax
|
||||
; CHECK: ldarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: cmpld 4, [[OLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: stdcx. 4, 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae16min(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw min i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae16min
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 0
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16
|
||||
; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]]
|
||||
; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]]
|
||||
; CHECK: cmpw 0, 4, [[SESMOLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae16max(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw max i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae16max
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 0
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16
|
||||
; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]]
|
||||
; CHECK: extsh [[SESMOLDV:[0-9]+]], [[SMOLDV]]
|
||||
; CHECK: cmpw 0, 4, [[SESMOLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae16umin(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae16umin
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 0
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16
|
||||
; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: cmplw 0, 4, [[MOLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae16umax(i16* nocapture dereferenceable(4) %minimum, i16 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i16* %minimum, i16 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae16umax
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 27
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 0
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 16
|
||||
; CHECK-DAG: ori [[M2:[0-9]+]], [[M1]], 65535
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M2]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: cmplw 0, 4, [[MOLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae8min(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw min i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae8min
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 255
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]]
|
||||
; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]]
|
||||
; CHECK: cmpw 0, 4, [[SESMOLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae8max(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw max i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae8max
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 255
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: srw [[SMOLDV:[0-9]+]], [[MOLDV]], [[SA]]
|
||||
; CHECK: extsb [[SESMOLDV:[0-9]+]], [[SMOLDV]]
|
||||
; CHECK: cmpw 0, 4, [[SESMOLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae8umin(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umin i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae8umin
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 255
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: cmplw 0, 4, [[MOLDV]]
|
||||
; CHECK: bgelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
define void @ae8umax(i8* nocapture dereferenceable(4) %minimum, i8 %val) #0 {
|
||||
entry:
|
||||
%0 = atomicrmw umax i8* %minimum, i8 %val monotonic
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @ae8umax
|
||||
; CHECK-DAG: rlwinm [[SA1:[0-9]+]], 3, 3, 27, 28
|
||||
; CHECK-DAG: li [[M1:[0-9]+]], 255
|
||||
; CHECK-DAG: rldicr 3, 3, 0, 61
|
||||
; CHECK-DAG: xori [[SA:[0-9]+]], [[SA1]], 24
|
||||
; CHECK-DAG: slw [[SV:[0-9]+]], 4, [[SA]]
|
||||
; CHECK-DAG: slw [[M:[0-9]+]], [[M1]], [[SA]]
|
||||
; CHECK-DAG: and [[SMV:[0-9]+]], [[SV]], [[M]]
|
||||
; CHECK: lwarx [[OLDV:[0-9]+]], 0, 3
|
||||
; CHECK: and [[MOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: cmplw 0, 4, [[MOLDV]]
|
||||
; CHECK: blelr 0
|
||||
; CHECK: andc [[NOLDV:[0-9]+]], [[OLDV]], [[M]]
|
||||
; CHECK: or [[NEWV:[0-9]+]], [[SMV]], [[NOLDV]]
|
||||
; CHECK: stwcx. [[NEWV]], 0, 3
|
||||
; CHECK: bne 0,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="ppc64" }
|
||||
attributes #1 = { nounwind "target-cpu"="pwr8" }
|
||||
|
@ -968,3 +968,25 @@ entry:
|
||||
%vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||
ret <4 x float> %vecins
|
||||
}
|
||||
define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) {
|
||||
entry:
|
||||
; CHECK-LABEL: insertVarF
|
||||
; CHECK: stxsspx 1,
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-BE-LABEL: insertVarF
|
||||
; CHECK-BE: stxsspx 1,
|
||||
; CHECK-BE: lxvw4x
|
||||
%vecins = insertelement <4 x float> %a, float %f, i32 %el
|
||||
ret <4 x float> %vecins
|
||||
}
|
||||
define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) {
|
||||
entry:
|
||||
; CHECK-LABEL: insertVarI
|
||||
; CHECK: stwx
|
||||
; CHECK: lxvd2x
|
||||
; CHECK-BE-LABEL: insertVarI
|
||||
; CHECK-BE: stwx
|
||||
; CHECK-BE: lxvw4x
|
||||
%vecins = insertelement <4 x i32> %a, i32 %i, i32 %el
|
||||
ret <4 x i32> %vecins
|
||||
}
|
||||
|
69
test/CodeGen/PowerPC/pr30451.ll
Normal file
69
test/CodeGen/PowerPC/pr30451.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s
|
||||
define i8 @atomic_min_i8() {
|
||||
top:
|
||||
%0 = alloca i8, align 2
|
||||
%1 = bitcast i8* %0 to i8*
|
||||
call void @llvm.lifetime.start(i64 2, i8* %1)
|
||||
store i8 -1, i8* %0, align 2
|
||||
%2 = atomicrmw min i8* %0, i8 0 acq_rel
|
||||
%3 = load atomic i8, i8* %0 acquire, align 8
|
||||
call void @llvm.lifetime.end(i64 2, i8* %1)
|
||||
ret i8 %3
|
||||
; CHECK-LABEL: atomic_min_i8
|
||||
; CHECK: lbarx [[DST:[0-9]+]],
|
||||
; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]]
|
||||
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
|
||||
; CHECK-NEXT: bge 0
|
||||
}
|
||||
define i16 @atomic_min_i16() {
|
||||
top:
|
||||
%0 = alloca i16, align 2
|
||||
%1 = bitcast i16* %0 to i8*
|
||||
call void @llvm.lifetime.start(i64 2, i8* %1)
|
||||
store i16 -1, i16* %0, align 2
|
||||
%2 = atomicrmw min i16* %0, i16 0 acq_rel
|
||||
%3 = load atomic i16, i16* %0 acquire, align 8
|
||||
call void @llvm.lifetime.end(i64 2, i8* %1)
|
||||
ret i16 %3
|
||||
; CHECK-LABEL: atomic_min_i16
|
||||
; CHECK: lharx [[DST:[0-9]+]],
|
||||
; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]]
|
||||
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
|
||||
; CHECK-NEXT: bge 0
|
||||
}
|
||||
|
||||
define i8 @atomic_max_i8() {
|
||||
top:
|
||||
%0 = alloca i8, align 2
|
||||
%1 = bitcast i8* %0 to i8*
|
||||
call void @llvm.lifetime.start(i64 2, i8* %1)
|
||||
store i8 -1, i8* %0, align 2
|
||||
%2 = atomicrmw max i8* %0, i8 0 acq_rel
|
||||
%3 = load atomic i8, i8* %0 acquire, align 8
|
||||
call void @llvm.lifetime.end(i64 2, i8* %1)
|
||||
ret i8 %3
|
||||
; CHECK-LABEL: atomic_max_i8
|
||||
; CHECK: lbarx [[DST:[0-9]+]],
|
||||
; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]]
|
||||
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
|
||||
; CHECK-NEXT: ble 0
|
||||
}
|
||||
define i16 @atomic_max_i16() {
|
||||
top:
|
||||
%0 = alloca i16, align 2
|
||||
%1 = bitcast i16* %0 to i8*
|
||||
call void @llvm.lifetime.start(i64 2, i8* %1)
|
||||
store i16 -1, i16* %0, align 2
|
||||
%2 = atomicrmw max i16* %0, i16 0 acq_rel
|
||||
%3 = load atomic i16, i16* %0 acquire, align 8
|
||||
call void @llvm.lifetime.end(i64 2, i8* %1)
|
||||
ret i16 %3
|
||||
; CHECK-LABEL: atomic_max_i16
|
||||
; CHECK: lharx [[DST:[0-9]+]],
|
||||
; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]]
|
||||
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
|
||||
; CHECK-NEXT: ble 0
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start(i64, i8*)
|
||||
declare void @llvm.lifetime.end(i64, i8*)
|
@ -546,3 +546,64 @@ define <4 x double> @splat_concat4(double* %p) {
|
||||
%6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
ret <4 x double> %6
|
||||
}
|
||||
|
||||
;
|
||||
; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
|
||||
;
|
||||
define float @broadcast_lifetime() nounwind {
|
||||
; X32-LABEL: broadcast_lifetime:
|
||||
; X32: ## BB#0:
|
||||
; X32-NEXT: pushl %esi
|
||||
; X32-NEXT: subl $56, %esp
|
||||
; X32-NEXT: leal {{[0-9]+}}(%esp), %esi
|
||||
; X32-NEXT: movl %esi, (%esp)
|
||||
; X32-NEXT: calll _gfunc
|
||||
; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) ## 16-byte Spill
|
||||
; X32-NEXT: movl %esi, (%esp)
|
||||
; X32-NEXT: calll _gfunc
|
||||
; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
|
||||
; X32-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## 16-byte Folded Reload
|
||||
; X32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: flds {{[0-9]+}}(%esp)
|
||||
; X32-NEXT: addl $56, %esp
|
||||
; X32-NEXT: popl %esi
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: broadcast_lifetime:
|
||||
; X64: ## BB#0:
|
||||
; X64-NEXT: subq $40, %rsp
|
||||
; X64-NEXT: leaq (%rsp), %rdi
|
||||
; X64-NEXT: callq _gfunc
|
||||
; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) ## 16-byte Spill
|
||||
; X64-NEXT: leaq (%rsp), %rdi
|
||||
; X64-NEXT: callq _gfunc
|
||||
; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-NEXT: vsubss {{[0-9]+}}(%rsp), %xmm0, %xmm0 ## 16-byte Folded Reload
|
||||
; X64-NEXT: addq $40, %rsp
|
||||
; X64-NEXT: retq
|
||||
%1 = alloca <4 x float>, align 16
|
||||
%2 = alloca <4 x float>, align 16
|
||||
%3 = bitcast <4 x float>* %1 to i8*
|
||||
%4 = bitcast <4 x float>* %2 to i8*
|
||||
|
||||
call void @llvm.lifetime.start(i64 16, i8* %3)
|
||||
call void @gfunc(<4 x float>* %1)
|
||||
%5 = load <4 x float>, <4 x float>* %1, align 16
|
||||
call void @llvm.lifetime.end(i64 16, i8* %3)
|
||||
|
||||
call void @llvm.lifetime.start(i64 16, i8* %4)
|
||||
call void @gfunc(<4 x float>* %2)
|
||||
%6 = load <4 x float>, <4 x float>* %2, align 16
|
||||
call void @llvm.lifetime.end(i64 16, i8* %4)
|
||||
|
||||
%7 = extractelement <4 x float> %5, i32 1
|
||||
%8 = extractelement <4 x float> %6, i32 1
|
||||
%9 = fsub float %8, %7
|
||||
ret float %9
|
||||
}
|
||||
|
||||
declare void @gfunc(<4 x float>*)
|
||||
declare void @llvm.lifetime.start(i64, i8*)
|
||||
declare void @llvm.lifetime.end(i64, i8*)
|
||||
|
29
test/CodeGen/X86/branchfolding-undef.mir
Normal file
29
test/CodeGen/X86/branchfolding-undef.mir
Normal file
@ -0,0 +1,29 @@
|
||||
# RUN: llc -o - %s -march=x86 -run-pass branch-folder | FileCheck %s
|
||||
# Test that tail merging drops undef flags that aren't present on all
|
||||
# instructions to be merged.
|
||||
--- |
|
||||
define void @func() { ret void }
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name: func
|
||||
# CHECK: bb.1:
|
||||
# CHECK: %eax = MOV32ri 2
|
||||
# CHECK-NOT: RET
|
||||
# CHECK: bb.2:
|
||||
# CHECK-NOT: RET 0, undef %eax
|
||||
# CHECK: RET 0, %eax
|
||||
name: func
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1, %bb.2
|
||||
JE_1 %bb.1, implicit undef %eflags
|
||||
JMP_1 %bb.2
|
||||
|
||||
bb.1:
|
||||
%eax = MOV32ri 2
|
||||
RET 0, %eax
|
||||
|
||||
bb.2:
|
||||
RET 0, undef %eax
|
||||
...
|
18
test/CodeGen/X86/no-and8ri8.ll
Normal file
18
test/CodeGen/X86/no-and8ri8.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx512f --show-mc-encoding < %s | FileCheck %s
|
||||
|
||||
declare i1 @bar()
|
||||
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK-NOT: andb {{.*}} # encoding: [0x82,
|
||||
define i1 @foo(i1 %i) nounwind {
|
||||
entry:
|
||||
br i1 %i, label %if, label %else
|
||||
|
||||
if:
|
||||
%r = call i1 @bar()
|
||||
br label %else
|
||||
|
||||
else:
|
||||
%ret = phi i1 [%r, %if], [true, %entry]
|
||||
ret i1 %ret
|
||||
}
|
43
test/CodeGen/X86/pr30298.ll
Normal file
43
test/CodeGen/X86/pr30298.ll
Normal file
@ -0,0 +1,43 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=i386-pc-linux-gnu -mattr=+sse < %s | FileCheck %s
|
||||
|
||||
@c = external global i32*, align 8
|
||||
|
||||
define void @mul_2xi8(i8* nocapture readonly %a, i8* nocapture readonly %b, i64 %index) nounwind {
|
||||
; CHECK-LABEL: mul_2xi8:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: pushl %ebx
|
||||
; CHECK-NEXT: pushl %edi
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
|
||||
; CHECK-NEXT: movl c, %esi
|
||||
; CHECK-NEXT: movzbl 1(%edx,%ecx), %edi
|
||||
; CHECK-NEXT: movzbl (%edx,%ecx), %edx
|
||||
; CHECK-NEXT: movzbl 1(%eax,%ecx), %ebx
|
||||
; CHECK-NEXT: movzbl (%eax,%ecx), %eax
|
||||
; CHECK-NEXT: imull %edx, %eax
|
||||
; CHECK-NEXT: imull %edi, %ebx
|
||||
; CHECK-NEXT: movl %ebx, 4(%esi,%ecx,4)
|
||||
; CHECK-NEXT: movl %eax, (%esi,%ecx,4)
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: popl %edi
|
||||
; CHECK-NEXT: popl %ebx
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
%pre = load i32*, i32** @c
|
||||
%tmp6 = getelementptr inbounds i8, i8* %a, i64 %index
|
||||
%tmp7 = bitcast i8* %tmp6 to <2 x i8>*
|
||||
%wide.load = load <2 x i8>, <2 x i8>* %tmp7, align 1
|
||||
%tmp8 = zext <2 x i8> %wide.load to <2 x i32>
|
||||
%tmp10 = getelementptr inbounds i8, i8* %b, i64 %index
|
||||
%tmp11 = bitcast i8* %tmp10 to <2 x i8>*
|
||||
%wide.load17 = load <2 x i8>, <2 x i8>* %tmp11, align 1
|
||||
%tmp12 = zext <2 x i8> %wide.load17 to <2 x i32>
|
||||
%tmp13 = mul nuw nsw <2 x i32> %tmp12, %tmp8
|
||||
%tmp14 = getelementptr inbounds i32, i32* %pre, i64 %index
|
||||
%tmp15 = bitcast i32* %tmp14 to <2 x i32>*
|
||||
store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4
|
||||
ret void
|
||||
}
|
20
test/LTO/X86/Inputs/type-mapping-src.ll
Normal file
20
test/LTO/X86/Inputs/type-mapping-src.ll
Normal file
@ -0,0 +1,20 @@
|
||||
target triple = "x86_64-pc-windows-msvc18.0.0"
|
||||
|
||||
%SrcType = type { i8 }
|
||||
@x = external global %SrcType
|
||||
|
||||
%CommonStruct = type opaque
|
||||
@bar = internal global %CommonStruct* null, !dbg !0
|
||||
|
||||
|
||||
!llvm.dbg.cu = !{!1}
|
||||
!llvm.module.flags = !{!12}
|
||||
!0 = distinct !DIGlobalVariable(name: "bar", linkageName: "bar", scope: !1, file: !2, line: 2, type: !5, isLocal: false, isDefinition: true)
|
||||
!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
|
||||
!2 = !DIFile(filename: "b", directory: "/")
|
||||
!3 = !{}
|
||||
!4 = !{!0}
|
||||
!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64)
|
||||
!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 1, flags: DIFlagFwdDecl, identifier: ".?AUS@@")
|
||||
!12 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
|
50
test/LTO/X86/type-mapping-bug.ll
Normal file
50
test/LTO/X86/type-mapping-bug.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llvm-as -o %t.dst.bc %s
|
||||
; RUN: llvm-as -o %t.src.bc %S/Inputs/type-mapping-src.ll
|
||||
; RUN: llvm-lto %t.dst.bc %t.src.bc -o=/dev/null
|
||||
|
||||
target triple = "x86_64-pc-windows-msvc18.0.0"
|
||||
|
||||
; @x in Src will be linked with this @x, causing SrcType in Src to be mapped
|
||||
; to %DstType.
|
||||
%DstType = type { i8 }
|
||||
@x = global %DstType zeroinitializer
|
||||
|
||||
; The Src module will re-use our DINode for this type.
|
||||
%CommonStruct = type { i32 }
|
||||
@foo = internal global %CommonStruct zeroinitializer, !dbg !5
|
||||
|
||||
; That DINode will refer to this value, casted to %Tricky.1* (!11),
|
||||
; which will then show up in Src's getIdentifiedStructTypes().
|
||||
@templateValueParam = global i8 zeroinitializer
|
||||
|
||||
; Because of the names, we would try to map %Tricky.1 to %Tricky --
|
||||
; mapping a Dst type to another Dst type! This would assert when
|
||||
; getting a mapping from %DstType, which has previously used as
|
||||
; a destination type. Since these types are not in the source module,
|
||||
; there should be no attempt to create a mapping involving them;
|
||||
; both types should be left as they are.
|
||||
%Tricky = type opaque
|
||||
%Tricky.1 = type { %DstType* }
|
||||
|
||||
|
||||
; Mark %Tricky used.
|
||||
@use = global %Tricky* zeroinitializer
|
||||
|
||||
!llvm.dbg.cu = !{!1}
|
||||
!llvm.module.flags = !{!19}
|
||||
!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
|
||||
!2 = !DIFile(filename: "a", directory: "/")
|
||||
!3 = !{}
|
||||
!4 = !{!5}
|
||||
!5 = distinct !DIGlobalVariable(name: "foo", linkageName: "foo", scope: !1, file: !2, line: 5, type: !6, isLocal: false, isDefinition: true)
|
||||
!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !2, line: 5, size: 8, elements: !7, identifier: ".?AUS@@")
|
||||
!7 = !{!8}
|
||||
!8 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !6, baseType: !9)
|
||||
!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Template<&x>", file: !2, line: 3, size: 8, elements: !3, templateParams: !10, identifier: ".?AU?$Template@$1?x@@3UX@@A@@")
|
||||
!10 = !{!11}
|
||||
|
||||
!11 = !DITemplateValueParameter(type: !12, value: %Tricky.1* bitcast (i8* @templateValueParam to %Tricky.1*))
|
||||
|
||||
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64)
|
||||
!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "X", file: !2, line: 1, size: 8, elements: !3, identifier: ".?AUX@@")
|
||||
!19 = !{i32 2, !"Debug Info Version", i32 3}
|
71
test/MC/ARM/ldr-pseudo-wide.s
Normal file
71
test/MC/ARM/ldr-pseudo-wide.s
Normal file
@ -0,0 +1,71 @@
|
||||
@ Test case for PR30352
|
||||
@ Check that ldr.w is:
|
||||
@ accepted and ignored for ARM
|
||||
@ accepted and propagated for Thumb2
|
||||
@ rejected as needing Thumb2 for Thumb
|
||||
|
||||
@RUN: llvm-mc -triple armv5-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-ARM --check-prefix=CHECK %s
|
||||
@RUN: llvm-mc -triple armv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-DARWIN-ARM --check-prefix=CHECK-DARWIN %s
|
||||
@RUN: llvm-mc -triple thumbv7-unknown-linux-gnueabi %s | FileCheck --check-prefix=CHECK-THUMB2 --check-prefix=CHECK %s
|
||||
@RUN: llvm-mc -triple thumbv7-base-apple-darwin %s | FileCheck --check-prefix=CHECK-DARWIN-THUMB2 --check-prefix=CHECK-DARWIN %s
|
||||
@RUN: not llvm-mc -triple thumbv6-unknown-linux-gnueabi %s 2>&1 | FileCheck --check-prefix=CHECK-THUMB %s
|
||||
@RUN: not llvm-mc -triple thumbv6-base-apple-darwin %s 2>&1 | FileCheck --check-prefix=CHECK-THUMB %s
|
||||
@ CHECK-LABEL: f1:
|
||||
f1:
|
||||
ldr r0, =0x10002
|
||||
@ CHECK-ARM: ldr r0, .Ltmp[[TMP0:[0-9]+]]
|
||||
@ CHECK-DARWIN-ARM: ldr r0, Ltmp0
|
||||
@ CHECK-THUMB2: ldr r0, .Ltmp[[TMP0:[0-9]+]]
|
||||
@ CHECK-DARWIN-THUMB2: ldr r0, Ltmp0
|
||||
|
||||
ldr.w r0, =0x10002
|
||||
@ CHECK-ARM: ldr r0, .Ltmp[[TMP1:[0-9]+]]
|
||||
@ CHECK-DARWIN-ARM: ldr r0, Ltmp1
|
||||
@ CHECK-THUMB2: ldr.w r0, .Ltmp[[TMP1:[0-9]+]]
|
||||
@ CHECK-DARWIN-THUMB2: ldr.w r0, Ltmp1
|
||||
@ CHECK-THUMB: error: instruction requires: thumb2
|
||||
@ CHECK-THUMB-NEXT: ldr.w r0, =0x10002
|
||||
|
||||
@ CHECK-LABEL: f2:
|
||||
f2:
|
||||
ldr r0, =foo
|
||||
@ CHECK-ARM: ldr r0, .Ltmp[[TMP2:[0-9]+]]
|
||||
@ CHECK-DARWIN-ARM: ldr r0, Ltmp2
|
||||
@ CHECK-THUMB2: ldr r0, .Ltmp[[TMP2:[0-9]+]]
|
||||
@ CHECK-DARWIN-THUMB2: ldr r0, Ltmp2
|
||||
|
||||
ldr.w r0, =foo
|
||||
@ CHECK-ARM: ldr r0, .Ltmp[[TMP3:[0-9]+]]
|
||||
@ CHECK-DARWIN-ARM: ldr r0, Ltmp3
|
||||
@ CHECK-THUMB2: ldr.w r0, .Ltmp[[TMP3:[0-9]+]]
|
||||
@ CHECK-DARWIN-THUMB2: ldr.w r0, Ltmp3
|
||||
@ CHECK-THUMB: error: instruction requires: thumb2
|
||||
@ CHECK-THUMB-NEXT: ldr.w r0, =foo
|
||||
|
||||
@ CHECK-LABEL: f3:
|
||||
f3:
|
||||
ldr.w r1, =0x1
|
||||
@ CHECK-ARM: mov r1, #1
|
||||
@ CHECK-DARWIN-ARM: mov r1, #1
|
||||
@ CHECK-THUMB2: mov.w r1, #1
|
||||
@ CHECK-DARWIN-THUMB2: mov.w r1, #1
|
||||
@ CHECK-THUMB: error: instruction requires: thumb2
|
||||
@ CHECK-THUMB-NEXT: ldr.w r1, =0x1
|
||||
|
||||
@ CHECK: .Ltmp0:
|
||||
@ CHECK-NEXT: .long 65538
|
||||
@ CHECK: .Ltmp1:
|
||||
@ CHECK-NEXT: .long 65538
|
||||
@ CHECK: .Ltmp2:
|
||||
@ CHECK-NEXT: .long foo
|
||||
@ CHECK: .Ltmp3:
|
||||
@ CHECK-NEXT: .long foo
|
||||
|
||||
@ CHECK-DARWIN: Ltmp0:
|
||||
@ CHECK-DARWIN-NEXT: .long 65538
|
||||
@ CHECK-DARWIN: Ltmp1:
|
||||
@ CHECK-DARWIN-NEXT: .long 65538
|
||||
@ CHECK-DARWIN: Ltmp2:
|
||||
@ CHECK-DARWIN-NEXT: .long foo
|
||||
@ CHECK-DARWIN: Ltmp3:
|
||||
@ CHECK-DARWIN-NEXT: .long foo
|
33
test/ThinLTO/X86/Inputs/crash_debuginfo.ll
Normal file
33
test/ThinLTO/X86/Inputs/crash_debuginfo.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; ModuleID = 'test/ThinLTO/X86/Inputs/crash_debuginfo.ll'
|
||||
source_filename = "src.bc"
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
%another_type = type { i32 }
|
||||
|
||||
define void @bar(i32 %arg) {
|
||||
%tmp = add i32 %arg, 0, !dbg !7
|
||||
unreachable
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "Apple LLVM version 8.0.0 (clang-800.0.25.1)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !3, imports: !2)
|
||||
!1 = !DIFile(filename: "2.cpp", directory: "some_dir")
|
||||
!2 = !{}
|
||||
!3 = !{!4}
|
||||
!4 = distinct !DIGlobalVariable(name: "a_global", linkageName: "a_global", scope: null, line: 52, type: !5, isLocal: true, isDefinition: true, variable: %another_type** undef)
|
||||
!5 = !DISubroutineType(types: !2)
|
||||
!6 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!7 = distinct !DILocation(line: 728, column: 71, scope: !8, inlinedAt: !14)
|
||||
!8 = distinct !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !5, isLocal: false, isDefinition: true, scopeLine: 727, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !10, variables: !11)
|
||||
!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "some_other_class", scope: !1, file: !1, line: 197, size: 192, align: 64, elements: !2, templateParams: !2, identifier: "some_other_class")
|
||||
!10 = !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !5, isLocal: false, isDefinition: false, scopeLine: 726, flags: DIFlagPrototyped, isOptimized: true)
|
||||
!11 = !{!12}
|
||||
!12 = !DILocalVariable(name: "caster", scope: !8, file: !1, line: 728, type: !13)
|
||||
!13 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !8, file: !1, line: 728, size: 64, align: 64, elements: !2, identifier: "someclass")
|
||||
!14 = distinct !DILocation(line: 795, column: 16, scope: !15)
|
||||
!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 794, column: 7)
|
||||
!16 = distinct !DISubprogram(name: "operator()", linkageName: "some_special_function", scope: null, file: !1, line: 783, type: !5, isLocal: true, isDefinition: true, scopeLine: 784, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !17, variables: !2)
|
||||
!17 = !DISubprogram(name: "operator()", linkageName: "some_special_function", scope: null, file: !1, line: 783, type: !5, isLocal: false, isDefinition: false, scopeLine: 783, flags: DIFlagPrototyped, isOptimized: true)
|
15
test/ThinLTO/X86/Inputs/import_opaque_type.ll
Normal file
15
test/ThinLTO/X86/Inputs/import_opaque_type.ll
Normal file
@ -0,0 +1,15 @@
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.11.0"
|
||||
|
||||
%0 = type { i8 }
|
||||
|
||||
%a = type { %0 * }
|
||||
|
||||
define void @bar(%a *) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @baz() {
|
||||
call void @bar(%a *null)
|
||||
ret void
|
||||
}
|
46
test/ThinLTO/X86/crash_debuginfo.ll
Normal file
46
test/ThinLTO/X86/crash_debuginfo.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; RUN: opt -module-summary -o %t-dst.bc %s
|
||||
; RUN: opt -module-summary -o %t-src.bc %p/Inputs/crash_debuginfo.ll
|
||||
; RUN: llvm-lto -thinlto -o %t-index %t-dst.bc %t-src.bc
|
||||
; RUN: opt -function-import -inline -summary-file %t-index.thinlto.bc %t-dst.bc -o %t.out
|
||||
; RUN: llvm-nm %t.out | FileCheck %s
|
||||
|
||||
; Verify that we import bar and inline it. It use to crash importing due to ODR type uniquing
|
||||
; CHECK-NOT: bar
|
||||
; CHECK: foo
|
||||
; CHECK-NOT: bar
|
||||
|
||||
; ModuleID = 'test/ThinLTO/X86/crash_debuginfo.ll'
|
||||
source_filename = "test/ThinLTO/X86/crash_debuginfo.ll"
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
%some_type = type { i32 }
|
||||
|
||||
define void @foo(i32 %arg) {
|
||||
call void @bar(i32 %arg), !dbg !7
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @bar(i32)
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!6}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "Apple LLVM version 8.0.0 (clang-800.0.24.1)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2)
|
||||
!1 = !DIFile(filename: "1.cpp", directory: "/another_dir")
|
||||
!2 = !{!3}
|
||||
!3 = distinct !DIGlobalVariable(name: "_", linkageName: "some_global", scope: null, file: !1, line: 20, type: !4, isLocal: true, isDefinition: true, variable: %some_type* undef)
|
||||
!4 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "slice_nil", file: !1, line: 13, size: 64, align: 64, elements: !5, identifier: "_ZTSN5boost6python3api9slice_nilE")
|
||||
!5 = !{}
|
||||
!6 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!7 = distinct !DILocation(line: 728, column: 71, scope: !8, inlinedAt: !15)
|
||||
!8 = distinct !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !10, isLocal: false, isDefinition: true, scopeLine: 727, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !11, variables: !12)
|
||||
!9 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "some_other_class", file: !1, line: 197, size: 192, align: 64, elements: !5, templateParams: !5, identifier: "some_other_class")
|
||||
!10 = !DISubroutineType(types: !5)
|
||||
!11 = !DISubprogram(name: "baz", linkageName: "baz", scope: !9, file: !1, line: 726, type: !10, isLocal: false, isDefinition: false, scopeLine: 726, flags: DIFlagPrototyped, isOptimized: true)
|
||||
!12 = !{!13}
|
||||
!13 = !DILocalVariable(name: "caster", scope: !8, file: !1, line: 728, type: !14)
|
||||
!14 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !8, file: !1, line: 728, size: 64, align: 64, elements: !5, identifier: "someclass")
|
||||
!15 = distinct !DILocation(line: 87, column: 9, scope: !16)
|
||||
!16 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !9, line: 73, type: !10, isLocal: false, isDefinition: true, scopeLine: 74, flags: DIFlagPrototyped, isOptimized: true, unit: !0, declaration: !17, variables: !5)
|
||||
!17 = !DISubprogram(name: "foo", linkageName: "foo", scope: !9, file: !1, line: 83, type: !10, isLocal: false, isDefinition: false, scopeLine: 83, flags: DIFlagPrototyped, isOptimized: true)
|
27
test/ThinLTO/X86/import_opaque_type.ll
Normal file
27
test/ThinLTO/X86/import_opaque_type.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; Do setup work for all below tests: generate bitcode and combined index
|
||||
; RUN: opt -module-summary %s -o %t.bc
|
||||
; RUN: opt -module-summary %p/Inputs/import_opaque_type.ll -o %t2.bc
|
||||
; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc
|
||||
|
||||
; Check that we import correctly the imported type to replace the opaque one here
|
||||
; RUN: llvm-lto -thinlto-action=import %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s
|
||||
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.11.0"
|
||||
|
||||
; CHECK: %0 = type { i8 }
|
||||
%0 = type opaque
|
||||
|
||||
%a = type { %0 * }
|
||||
|
||||
declare void @baz()
|
||||
define void @foo(%a *) {
|
||||
call void @baz()
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main() {
|
||||
call void @foo(%a *null)
|
||||
ret i32 0
|
||||
}
|
42
test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
Normal file
42
test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
|
||||
|
||||
declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
|
||||
declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
|
||||
|
||||
; This test ensures that masked scatter and gather operations, which take vectors of pointers,
|
||||
; do not have pointer aliasing ignored when being processed.
|
||||
; No scatter/gather calls should end up eliminated
|
||||
; CHECK: llvm.masked.gather
|
||||
; CHECK: llvm.masked.gather
|
||||
; CHECK: llvm.masked.scatter
|
||||
; CHECK: llvm.masked.gather
|
||||
; CHECK: llvm.masked.scatter
|
||||
; CHECK: llvm.masked.gather
|
||||
define spir_kernel void @test(<2 x i32*> %in1, <2 x i32*> %in2, i32* %out) {
|
||||
entry:
|
||||
; Just some temporary storage
|
||||
%tmp.0 = alloca i32
|
||||
%tmp.1 = alloca i32
|
||||
%tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
|
||||
%tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
|
||||
; Read from in1 and in2
|
||||
%in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
|
||||
%in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
|
||||
; Store in1 to the allocas
|
||||
call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
|
||||
; Read in1 from the allocas
|
||||
; This gather should alias the scatter we just saw
|
||||
%tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
|
||||
; Store in2 to the allocas
|
||||
call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
|
||||
; Read in2 from the allocas
|
||||
; This gather should alias the scatter we just saw, and not be eliminated
|
||||
%tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
|
||||
; Store in2 to out for good measure
|
||||
%tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
|
||||
%tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1
|
||||
store i32 %tmp.v.1.0, i32* %out
|
||||
%out.1 = getelementptr i32, i32* %out, i32 1
|
||||
store i32 %tmp.v.1.1, i32* %out.1
|
||||
ret void
|
||||
}
|
33
test/Transforms/JumpThreading/pr27840.ll
Normal file
33
test/Transforms/JumpThreading/pr27840.ll
Normal file
@ -0,0 +1,33 @@
|
||||
; RUN: opt -jump-threading -S < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.11.0"
|
||||
|
||||
declare void @helper()
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
|
||||
define void @pr27840(i8* %call, i1 %A) personality i32(...)* @__gxx_personality_v0 {
|
||||
entry:
|
||||
invoke void @helper()
|
||||
to label %invoke.cont unwind label %lpad
|
||||
|
||||
; Don't jump threading; we can't split the critical edge from entry to lpad.
|
||||
; CHECK-LABEL: @pr27840
|
||||
; CHECK: invoke
|
||||
; CHECK-NEXT: to label %invoke.cont unwind label %lpad
|
||||
|
||||
invoke.cont:
|
||||
invoke void @helper()
|
||||
to label %nowhere unwind label %lpad
|
||||
|
||||
lpad:
|
||||
%b = phi i1 [ true, %invoke.cont ], [ false, %entry ]
|
||||
landingpad { i8*, i32 }
|
||||
cleanup
|
||||
%xor = xor i1 %b, %A
|
||||
br i1 %xor, label %nowhere, label %invoke.cont
|
||||
|
||||
nowhere:
|
||||
unreachable
|
||||
}
|
@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
;CHECK: br
|
||||
;CHECK: getelementptr
|
||||
;CHECK-DAG: getelementptr
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp uge
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: icmp ugt
|
||||
;CHECK-DAG: and
|
||||
;CHECK-DAG: and
|
||||
;CHECK: br
|
||||
|
@ -36,7 +36,7 @@ for.end: ; preds = %for.body
|
||||
; CHECK: ret i32 0
|
||||
|
||||
; CHECK-NOTBAA-LABEL: @test1
|
||||
; CHECK-NOTBAA: icmp uge i32*
|
||||
; CHECK-NOTBAA: icmp ugt i32*
|
||||
|
||||
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
|
||||
@ -70,8 +70,8 @@ for.end: ; preds = %for.body
|
||||
; required. Without TBAA, however, two checks are required.
|
||||
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: icmp uge float*
|
||||
; CHECK: icmp uge float*
|
||||
; CHECK: icmp ugt float*
|
||||
; CHECK: icmp ugt float*
|
||||
; CHECK-NOT: icmp uge i32*
|
||||
|
||||
; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
@ -80,10 +80,10 @@ for.end: ; preds = %for.body
|
||||
; CHECK: ret i32 0
|
||||
|
||||
; CHECK-NOTBAA-LABEL: @test2
|
||||
; CHECK-NOTBAA: icmp uge float*
|
||||
; CHECK-NOTBAA: icmp uge float*
|
||||
; CHECK-NOTBAA-DAG: icmp uge float*
|
||||
; CHECK-NOTBAA-DAG: icmp uge i32*
|
||||
; CHECK-NOTBAA: icmp ugt float*
|
||||
; CHECK-NOTBAA: icmp ugt float*
|
||||
; CHECK-NOTBAA-DAG: icmp ugt float*
|
||||
; CHECK-NOTBAA-DAG: icmp ugt i32*
|
||||
|
||||
; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
|
||||
|
@ -8,15 +8,15 @@
|
||||
; CHECK-NEXT: Loop Versioning found to be beneficial
|
||||
;
|
||||
; CHECK: for.body3:
|
||||
; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
|
||||
; CHECK-NEXT: %[[induction:.*]] = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
|
||||
; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
|
||||
; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
|
||||
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
|
||||
; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6
|
||||
; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
|
||||
; CHECK-NEXT: %add8 = add nsw i32 %[[induction]], %add
|
||||
; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
|
||||
; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
|
||||
; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7
|
||||
; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit6, !llvm.loop !7
|
||||
define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
|
||||
entry:
|
||||
%cmp14 = icmp eq i32 %itr, 0
|
||||
|
Loading…
Reference in New Issue
Block a user