Vendor import of llvm release_60 branch r323948:

https://llvm.org/svn/llvm-project/llvm/branches/release_60@323948
This commit is contained in:
Dimitry Andric 2018-02-01 21:07:55 +00:00
parent a096e0bdf6
commit 4a6a1ccbec
46 changed files with 754 additions and 313 deletions

View File

@ -15,7 +15,7 @@ Introduction
============
This document contains the release notes for the LLVM Compiler Infrastructure,
release 5.0.0. Here we describe the status of LLVM, including major improvements
release 6.0.0. Here we describe the status of LLVM, including major improvements
from the previous release, improvements in various subprojects of LLVM, and
some of the current users of the code. All LLVM releases may be downloaded
from the `LLVM releases web site <http://llvm.org/releases/>`_.

View File

@ -508,7 +508,8 @@ class Value;
/// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt
///
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp = nullptr);
Instruction::CastOps *CastOp = nullptr,
unsigned Depth = 0);
inline SelectPatternResult
matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS,
Instruction::CastOps *CastOp = nullptr) {

View File

@ -422,14 +422,21 @@ class MCFillFragment : public MCFragment {
uint8_t Value;
/// The number of bytes to insert.
uint64_t Size;
const MCExpr &Size;
/// Source location of the directive that this fragment was created for.
SMLoc Loc;
public:
MCFillFragment(uint8_t Value, uint64_t Size, MCSection *Sec = nullptr)
: MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size) {}
MCFillFragment(uint8_t Value, const MCExpr &Size, SMLoc Loc,
MCSection *Sec = nullptr)
: MCFragment(FT_Fill, false, 0, Sec), Value(Value), Size(Size), Loc(Loc) {
}
uint8_t getValue() const { return Value; }
uint64_t getSize() const { return Size; }
const MCExpr &getSize() const { return Size; }
SMLoc getLoc() const { return Loc; }
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Fill;

View File

@ -161,7 +161,6 @@ class MCObjectStreamer : public MCStreamer {
bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
const MCExpr *Expr, SMLoc Loc) override;
using MCStreamer::emitFill;
void emitFill(uint64_t NumBytes, uint8_t FillValue) override;
void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc = SMLoc()) override;
void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,

View File

@ -662,7 +662,7 @@ class MCStreamer {
/// \brief Emit NumBytes bytes worth of the value specified by FillValue.
/// This implements directives such as '.space'.
virtual void emitFill(uint64_t NumBytes, uint8_t FillValue);
void emitFill(uint64_t NumBytes, uint8_t FillValue);
/// \brief Emit \p Size bytes worth of the value specified by \p FillValue.
///

View File

@ -4165,17 +4165,18 @@ static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
/// a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal) {
Value *TVal, Value *FVal,
unsigned Depth) {
// TODO: Allow FP min/max with nnan/nsz.
assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
Value *A, *B;
SelectPatternResult L = matchSelectPattern(TrueVal, A, B);
SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
if (!SelectPatternResult::isMinOrMax(L.Flavor))
return {SPF_UNKNOWN, SPNB_NA, false};
Value *C, *D;
SelectPatternResult R = matchSelectPattern(FalseVal, C, D);
SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
if (L.Flavor != R.Flavor)
return {SPF_UNKNOWN, SPNB_NA, false};
@ -4214,7 +4215,7 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
break;
return {SPF_UNKNOWN, SPNB_NA, false};
default:
llvm_unreachable("Bad flavor while matching min/max");
return {SPF_UNKNOWN, SPNB_NA, false};
}
// a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
@ -4240,7 +4241,8 @@ static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
Value *&LHS, Value *&RHS,
unsigned Depth) {
// Assume success. If there's no match, callers should not use these anyway.
LHS = TrueVal;
RHS = FalseVal;
@ -4249,7 +4251,7 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
return SPR;
SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
return SPR;
@ -4313,7 +4315,8 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
FastMathFlags FMF,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
Value *&LHS, Value *&RHS,
unsigned Depth) {
LHS = CmpLHS;
RHS = CmpRHS;
@ -4429,7 +4432,7 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
}
if (CmpInst::isIntPredicate(Pred))
return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
// According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
// may return either -0.0 or 0.0, so fcmp/select pair has stricter
@ -4550,7 +4553,11 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
}
SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp) {
Instruction::CastOps *CastOp,
unsigned Depth) {
if (Depth >= MaxDepth)
return {SPF_UNKNOWN, SPNB_NA, false};
SelectInst *SI = dyn_cast<SelectInst>(V);
if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
@ -4579,7 +4586,7 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
FMF.setNoSignedZeros();
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
cast<CastInst>(TrueVal)->getOperand(0), C,
LHS, RHS);
LHS, RHS, Depth);
}
if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
// If this is a potential fmin/fmax with a cast to integer, then ignore
@ -4588,11 +4595,11 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
FMF.setNoSignedZeros();
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
C, cast<CastInst>(FalseVal)->getOperand(0),
LHS, RHS);
LHS, RHS, Depth);
}
}
return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
LHS, RHS);
LHS, RHS, Depth);
}
/// Return true if "icmp Pred LHS RHS" is always true.

View File

@ -812,6 +812,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
// FIXME: support Windows dllimport function calls.
if (F && F->hasDLLImportStorageClass())
return false;
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);

View File

@ -661,7 +661,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
}
case TargetOpcode::G_FCONSTANT: {
unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm());
const ConstantFP *CFP = MI.getOperand(1).getFPImm();
APFloat Val = CFP->getValueAPF();
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
auto LLT2Sem = [](LLT Ty) {
switch (Ty.getSizeInBits()) {
case 32:
return &APFloat::IEEEsingle();
break;
case 64:
return &APFloat::IEEEdouble();
break;
default:
llvm_unreachable("Unhandled fp widen type");
}
};
bool LosesInfo;
Val.convert(*LLT2Sem(WideTy), APFloat::rmTowardZero, &LosesInfo);
MIRBuilder.buildFConstant(DstExt, *ConstantFP::get(Ctx, Val));
MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt);
MI.eraseFromParent();
return Legalized;

View File

@ -193,9 +193,10 @@ namespace {
void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
void usePhysReg(MachineOperand &MO);
void definePhysReg(MachineInstr &MI, MCPhysReg PhysReg, RegState NewState);
void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
RegState NewState);
unsigned calcSpillCost(MCPhysReg PhysReg) const;
void assignVirtToPhysReg(LiveReg&, MCPhysReg PhysReg);
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
@ -434,8 +435,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
/// Mark PhysReg as reserved or free after spilling any virtregs. This is very
/// similar to defineVirtReg except the physreg is reserved instead of
/// allocated.
void RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg PhysReg,
RegState NewState) {
void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
MCPhysReg PhysReg, RegState NewState) {
markRegUsedInInstr(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
@ -857,7 +858,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Add live-in registers as live.
for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
if (MRI->isAllocatable(LI.PhysReg))
definePhysReg(*MII, LI.PhysReg, regReserved);
definePhysReg(MII, LI.PhysReg, regReserved);
VirtDead.clear();
Coalesced.clear();

View File

@ -1380,8 +1380,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
if (TM.Options.EnableFastISel) {
DEBUG(dbgs() << "Enabling fast-isel\n");
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
}
setupSwiftErrorVals(Fn, TLI, FuncInfo);

View File

@ -717,6 +717,8 @@ bool TargetPassConfig::addCoreISelPasses() {
if (EnableGlobalISel == cl::BOU_TRUE ||
(EnableGlobalISel == cl::BOU_UNSET && isGlobalISelEnabled() &&
EnableFastISelOption != cl::BOU_TRUE)) {
TM->setFastISel(false);
if (addIRTranslator())
return true;

View File

@ -192,9 +192,6 @@ class MCAsmStreamer final : public MCStreamer {
void EmitGPRel32Value(const MCExpr *Value) override;
void emitFill(uint64_t NumBytes, uint8_t FillValue) override;
void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc = SMLoc()) override;
@ -965,17 +962,12 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
EmitEOL();
}
/// emitFill - Emit NumBytes bytes worth of the value specified by
/// FillValue. This implements directives such as '.space'.
void MCAsmStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
if (NumBytes == 0) return;
const MCExpr *E = MCConstantExpr::create(NumBytes, getContext());
emitFill(*E, FillValue);
}
void MCAsmStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc) {
int64_t IntNumBytes;
if (NumBytes.evaluateAsAbsolute(IntNumBytes) && IntNumBytes == 0)
return;
if (const char *ZeroDirective = MAI->getZeroDirective()) {
// FIXME: Emit location directives
OS << ZeroDirective;

View File

@ -281,8 +281,18 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
return cast<MCRelaxableFragment>(F).getContents().size();
case MCFragment::FT_CompactEncodedInst:
return cast<MCCompactEncodedInstFragment>(F).getContents().size();
case MCFragment::FT_Fill:
return cast<MCFillFragment>(F).getSize();
case MCFragment::FT_Fill: {
auto &FF = cast<MCFillFragment>(F);
int64_t Size = 0;
if (!FF.getSize().evaluateAsAbsolute(Size, Layout))
getContext().reportError(FF.getLoc(),
"expected assembly-time absolute expression");
if (Size < 0) {
getContext().reportError(FF.getLoc(), "invalid number of bytes");
return 0;
}
return Size;
}
case MCFragment::FT_LEB:
return cast<MCLEBFragment>(F).getContents().size();
@ -540,7 +550,7 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
for (unsigned I = 1; I < MaxChunkSize; ++I)
Data[I] = Data[0];
uint64_t Size = FF.getSize();
uint64_t Size = FragmentSize;
for (unsigned ChunkSize = MaxChunkSize; ChunkSize; ChunkSize /= 2) {
StringRef Ref(Data, ChunkSize);
for (uint64_t I = 0, E = Size / ChunkSize; I != E; ++I)

View File

@ -411,29 +411,19 @@ void MCMachOStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
getAssembler().registerSection(*Section);
// The symbol may not be present, which only creates the section.
if (!Symbol)
return;
// On darwin all virtual sections have zerofill type.
assert(Section->isVirtualSection() && "Section does not have zerofill type!");
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
PushSection();
SwitchSection(Section);
getAssembler().registerSymbol(*Symbol);
// Emit an align fragment if necessary.
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section);
MCFragment *F = new MCFillFragment(0, Size, Section);
Symbol->setFragment(F);
// Update the maximum alignment on the zero fill section if necessary.
if (ByteAlignment > Section->getAlignment())
Section->setAlignment(ByteAlignment);
// The symbol may not be present, which only creates the section.
if (Symbol) {
EmitValueToAlignment(ByteAlignment, 0, 1, 0);
EmitLabel(Symbol);
EmitZeros(Size);
}
PopSection();
}
// This should always be called with the thread local bss section. Like the

View File

@ -577,28 +577,13 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
return false;
}
void MCObjectStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
assert(getCurrentSectionOnly() && "need a section");
insert(new MCFillFragment(FillValue, NumBytes));
}
void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
SMLoc Loc) {
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
int64_t IntNumBytes;
if (!NumBytes.evaluateAsAbsolute(IntNumBytes, getAssembler())) {
getContext().reportError(Loc, "expected absolute expression");
return;
}
if (IntNumBytes <= 0) {
getContext().reportError(Loc, "invalid number of bytes");
return;
}
emitFill(IntNumBytes, FillValue);
assert(getCurrentSectionOnly() && "need a section");
insert(new MCFillFragment(FillValue, NumBytes, Loc));
}
void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,

View File

@ -184,8 +184,7 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
/// Emit NumBytes bytes worth of the value specified by FillValue.
/// This implements directives such as '.space'.
void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
for (uint64_t i = 0, e = NumBytes; i != e; ++i)
EmitIntValue(FillValue, 1);
emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue);
}
void MCStreamer::emitFill(uint64_t NumValues, int64_t Size, int64_t Expr) {

View File

@ -257,20 +257,13 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
auto *Symbol = cast<MCSymbolCOFF>(S);
MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
getAssembler().registerSection(*Section);
if (Section->getAlignment() < ByteAlignment)
Section->setAlignment(ByteAlignment);
getAssembler().registerSymbol(*Symbol);
PushSection();
SwitchSection(Section);
EmitValueToAlignment(ByteAlignment, 0, 1, 0);
EmitLabel(Symbol);
Symbol->setExternal(false);
if (ByteAlignment != 1)
new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0,
ByteAlignment, Section);
MCFillFragment *Fragment = new MCFillFragment(
/*Value=*/0, Size, Section);
Symbol->setFragment(Fragment);
EmitZeros(Size);
PopSection();
}
void MCWinCOFFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,

View File

@ -528,7 +528,10 @@ static void addData(SmallVectorImpl<char> &DataBytes,
Align->getMaxBytesToEmit());
DataBytes.resize(Size, Value);
} else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) {
DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue());
int64_t Size;
if (!Fill->getSize().evaluateAsAbsolute(Size))
llvm_unreachable("The fill should be an assembler constant");
DataBytes.insert(DataBytes.end(), Size, Fill->getValue());
} else {
const auto &DataFrag = cast<MCDataFragment>(Frag);
const SmallVectorImpl<char> &Contents = DataFrag.getContents();

View File

@ -476,26 +476,27 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// ADRP + LDRX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
ResultReg = createResultReg(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
AArch64II::MO_NC);
.addReg(ADRPReg)
.addGlobalAddress(GV, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addReg(ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
.addImm(0);
.addReg(ADRPReg)
.addGlobalAddress(GV, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
.addImm(0);
}
return ResultReg;
}

View File

@ -929,6 +929,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
// FIXME: PR36018: Volatile loads in some cases are incorrectly selected by
// folding with an extend. Until we have a G_SEXTLOAD solution bail out if
// we hit one.
if (Opcode == TargetOpcode::G_LOAD && MemOp.isVolatile())
return false;
const unsigned PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);

View File

@ -189,15 +189,18 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
return AArch64II::MO_GOT;
unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
: AArch64II::MO_NO_FLAG;
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
return AArch64II::MO_GOT | Flags;
// The small code model's direct accesses use ADRP, which cannot
// necessarily produce the value 0 (if the code is above 4GB).
if (useSmallAddressing() && GV->hasExternalWeakLinkage())
return AArch64II::MO_GOT;
return AArch64II::MO_GOT | Flags;
return AArch64II::MO_NO_FLAG;
return Flags;
}
unsigned char AArch64Subtarget::classifyGlobalFunctionReference(

View File

@ -210,65 +210,73 @@ void SIInsertSkips::kill(MachineInstr &MI) {
switch (MI.getOperand(2).getImm()) {
case ISD::SETOEQ:
case ISD::SETEQ:
Opcode = AMDGPU::V_CMPX_EQ_F32_e32;
Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
break;
case ISD::SETOGT:
case ISD::SETGT:
Opcode = AMDGPU::V_CMPX_LT_F32_e32;
Opcode = AMDGPU::V_CMPX_LT_F32_e64;
break;
case ISD::SETOGE:
case ISD::SETGE:
Opcode = AMDGPU::V_CMPX_LE_F32_e32;
Opcode = AMDGPU::V_CMPX_LE_F32_e64;
break;
case ISD::SETOLT:
case ISD::SETLT:
Opcode = AMDGPU::V_CMPX_GT_F32_e32;
Opcode = AMDGPU::V_CMPX_GT_F32_e64;
break;
case ISD::SETOLE:
case ISD::SETLE:
Opcode = AMDGPU::V_CMPX_GE_F32_e32;
Opcode = AMDGPU::V_CMPX_GE_F32_e64;
break;
case ISD::SETONE:
case ISD::SETNE:
Opcode = AMDGPU::V_CMPX_LG_F32_e32;
Opcode = AMDGPU::V_CMPX_LG_F32_e64;
break;
case ISD::SETO:
Opcode = AMDGPU::V_CMPX_O_F32_e32;
Opcode = AMDGPU::V_CMPX_O_F32_e64;
break;
case ISD::SETUO:
Opcode = AMDGPU::V_CMPX_U_F32_e32;
Opcode = AMDGPU::V_CMPX_U_F32_e64;
break;
case ISD::SETUEQ:
Opcode = AMDGPU::V_CMPX_NLG_F32_e32;
Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
break;
case ISD::SETUGT:
Opcode = AMDGPU::V_CMPX_NGE_F32_e32;
Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
break;
case ISD::SETUGE:
Opcode = AMDGPU::V_CMPX_NGT_F32_e32;
Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
break;
case ISD::SETULT:
Opcode = AMDGPU::V_CMPX_NLE_F32_e32;
Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
break;
case ISD::SETULE:
Opcode = AMDGPU::V_CMPX_NLT_F32_e32;
Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
break;
case ISD::SETUNE:
Opcode = AMDGPU::V_CMPX_NEQ_F32_e32;
Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
break;
default:
llvm_unreachable("invalid ISD:SET cond code");
}
// TODO: Allow this:
if (!MI.getOperand(0).isReg() ||
!TRI->isVGPR(MBB.getParent()->getRegInfo(),
MI.getOperand(0).getReg()))
llvm_unreachable("SI_KILL operand should be a VGPR");
assert(MI.getOperand(0).isReg());
BuildMI(MBB, &MI, DL, TII->get(Opcode))
.add(MI.getOperand(1))
.add(MI.getOperand(0));
if (TRI->isVGPR(MBB.getParent()->getRegInfo(),
MI.getOperand(0).getReg())) {
Opcode = AMDGPU::getVOPe32(Opcode);
BuildMI(MBB, &MI, DL, TII->get(Opcode))
.add(MI.getOperand(1))
.add(MI.getOperand(0));
} else {
BuildMI(MBB, &MI, DL, TII->get(Opcode))
.addReg(AMDGPU::VCC, RegState::Define)
.addImm(0) // src0 modifiers
.add(MI.getOperand(1))
.addImm(0) // src1 modifiers
.add(MI.getOperand(0))
.addImm(0); // omod
}
break;
}
case AMDGPU::SI_KILL_I1_TERMINATOR: {

View File

@ -39,11 +39,11 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
const MCSubtargetInfo &STI) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
unsigned Flags = MI->getFlags();
if (TSFlags & X86II::LOCK)
if ((TSFlags & X86II::LOCK) || (Flags & X86::IP_HAS_LOCK))
OS << "\tlock\t";
unsigned Flags = MI->getFlags();
if (Flags & X86::IP_HAS_REPEAT_NE)
OS << "\trepne\t";
else if (Flags & X86::IP_HAS_REPEAT)

View File

@ -31776,9 +31776,10 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Check all uses of the condition operand to check whether it will be
// consumed by non-BLEND instructions. Those may require that all bits
// are set properly.
for (SDNode *U : Cond->uses()) {
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI) {
// TODO: Add other opcodes eventually lowered into BLEND.
if (U->getOpcode() != ISD::VSELECT)
if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
return SDValue();
}

View File

@ -142,10 +142,11 @@ recordConditions(const CallSite &CS, BasicBlock *Pred,
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
BasicBlock *From = Pred;
BasicBlock *To = Pred;
SmallPtrSet<BasicBlock *, 4> Visited = {From};
SmallPtrSet<BasicBlock *, 4> Visited;
while (!Visited.count(From->getSinglePredecessor()) &&
(From = From->getSinglePredecessor())) {
recordCondition(CS, From, To, Conditions);
Visited.insert(From);
To = From;
}
}

View File

@ -14,6 +14,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
@ -176,8 +177,9 @@ class StructurizeCFG : public RegionPass {
Region *ParentRegion;
DominatorTree *DT;
LoopInfo *LI;
std::deque<RegionNode *> Order;
SmallVector<RegionNode *, 8> Order;
BBSet Visited;
BBPhiMap DeletedPhis;
@ -202,7 +204,7 @@ class StructurizeCFG : public RegionPass {
void gatherPredicates(RegionNode *N);
void analyzeNode(RegionNode *N);
void collectInfos();
void insertConditions(bool Loops);
@ -256,6 +258,7 @@ class StructurizeCFG : public RegionPass {
AU.addRequired<DivergenceAnalysis>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@ -289,17 +292,55 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
/// \brief Build up the general order of nodes
void StructurizeCFG::orderNodes() {
assert(Visited.empty());
assert(Predicates.empty());
assert(Loops.empty());
assert(LoopPreds.empty());
ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
// This must be RPO order for the back edge detection to work
for (RegionNode *RN : ReversePostOrderTraversal<Region*>(ParentRegion)) {
// FIXME: Is there a better order to use for structurization?
Order.push_back(RN);
analyzeNode(RN);
// The reverse post-order traversal of the list gives us an ordering close
// to what we want. The only problem with it is that sometimes backedges
// for outer loops will be visited before backedges for inner loops.
for (RegionNode *RN : RPOT) {
BasicBlock *BB = RN->getEntry();
Loop *Loop = LI->getLoopFor(BB);
++LoopBlocks[Loop];
}
unsigned CurrentLoopDepth = 0;
Loop *CurrentLoop = nullptr;
for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
BasicBlock *BB = (*I)->getEntry();
unsigned LoopDepth = LI->getLoopDepth(BB);
if (is_contained(Order, *I))
continue;
if (LoopDepth < CurrentLoopDepth) {
// Make sure we have visited all blocks in this loop before moving back to
// the outer loop.
auto LoopI = I;
while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
LoopI++;
BasicBlock *LoopBB = (*LoopI)->getEntry();
if (LI->getLoopFor(LoopBB) == CurrentLoop) {
--BlockCount;
Order.push_back(*LoopI);
}
}
}
CurrentLoop = LI->getLoopFor(BB);
if (CurrentLoop)
LoopBlocks[CurrentLoop]--;
CurrentLoopDepth = LoopDepth;
Order.push_back(*I);
}
// This pass originally used a post-order traversal and then operated on
// the list in reverse. Now that we are using a reverse post-order traversal
// rather than re-working the whole pass to operate on the list in order,
// we just reverse the list and continue to operate on it in reverse.
std::reverse(Order.begin(), Order.end());
}
/// \brief Determine the end of the loops
@ -425,19 +466,32 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
}
/// \brief Collect various loop and predicate infos
void StructurizeCFG::analyzeNode(RegionNode *RN) {
DEBUG(dbgs() << "Visiting: "
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
<< RN->getEntry()->getName() << '\n');
void StructurizeCFG::collectInfos() {
// Reset predicate
Predicates.clear();
// Analyze all the conditions leading to a node
gatherPredicates(RN);
// and loop infos
Loops.clear();
LoopPreds.clear();
// Remember that we've seen this node
Visited.insert(RN->getEntry());
// Reset the visited nodes
Visited.clear();
// Find the last back edges
analyzeLoops(RN);
for (RegionNode *RN : reverse(Order)) {
DEBUG(dbgs() << "Visiting: "
<< (RN->isSubRegion() ? "SubRegion with entry: " : "")
<< RN->getEntry()->getName() << " Loop Depth: "
<< LI->getLoopDepth(RN->getEntry()) << "\n");
// Analyze all the conditions leading to a node
gatherPredicates(RN);
// Remember that we've seen this node
Visited.insert(RN->getEntry());
// Find the last back edges
analyzeLoops(RN);
}
}
/// \brief Insert the missing branch conditions
@ -610,7 +664,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
LLVMContext &Context = Func->getContext();
BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
Order.front()->getEntry();
Order.back()->getEntry();
BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
Func, Insert);
DT->addNewBlock(Flow, Dominator);
@ -690,8 +744,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
/// Take one node from the order vector and wire it up
void StructurizeCFG::wireFlow(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
RegionNode *Node = Order.front();
Order.pop_front();
RegionNode *Node = Order.pop_back_val();
Visited.insert(Node->getEntry());
if (isPredictableTrue(Node)) {
@ -715,7 +768,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
PrevNode = Node;
while (!Order.empty() && !Visited.count(LoopEnd) &&
dominatesPredicates(Entry, Order.front())) {
dominatesPredicates(Entry, Order.back())) {
handleLoops(false, LoopEnd);
}
@ -726,7 +779,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
void StructurizeCFG::handleLoops(bool ExitUseAllowed,
BasicBlock *LoopEnd) {
RegionNode *Node = Order.front();
RegionNode *Node = Order.back();
BasicBlock *LoopStart = Node->getEntry();
if (!Loops.count(LoopStart)) {
@ -871,9 +924,10 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
ParentRegion = R;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
orderNodes();
collectInfos();
createFlow();
insertConditions(false);
insertConditions(true);

View File

@ -25,6 +25,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
@ -536,13 +537,23 @@ Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {
return None;
}
static Metadata *cloneOrBuildODR(const MDNode &N) {
auto *CT = dyn_cast<DICompositeType>(&N);
// If ODR type uniquing is enabled, we would have uniqued composite types
// with identifiers during bitcode reading, so we can just use CT.
if (CT && CT->getContext().isODRUniquingDebugTypes() &&
CT->getIdentifier() != "")
return const_cast<DICompositeType *>(CT);
return MDNode::replaceWithDistinct(N.clone());
}
MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {
assert(N.isDistinct() && "Expected a distinct node");
assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node");
DistinctWorklist.push_back(cast<MDNode>(
(M.Flags & RF_MoveDistinctMDs)
? M.mapToSelf(&N)
: M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone()))));
DistinctWorklist.push_back(
cast<MDNode>((M.Flags & RF_MoveDistinctMDs)
? M.mapToSelf(&N)
: M.mapToMetadata(&N, cloneOrBuildODR(N))));
return DistinctWorklist.back();
}

View File

@ -0,0 +1,46 @@
; RUN: opt -simplifycfg < %s -S | FileCheck %s
; The dead code would cause a select that had itself
; as an operand to be analyzed. This would then cause
; infinite recursion and eventual crash.
define void @PR36045(i1 %t, i32* %b) {
; CHECK-LABEL: @PR36045(
; CHECK-NEXT: entry:
; CHECK-NEXT: ret void
;
entry:
br i1 %t, label %if, label %end
if:
br i1 %t, label %unreach, label %pre
unreach:
unreachable
pre:
%p = phi i32 [ 70, %if ], [ %sel, %for ]
br label %for
for:
%cmp = icmp sgt i32 %p, 8
%add = add i32 %p, 2
%sel = select i1 %cmp, i32 %p, i32 %add
%cmp21 = icmp ult i32 %sel, 21
br i1 %cmp21, label %pre, label %for.end
for.end:
br i1 %t, label %unreach2, label %then12
then12:
store i32 0, i32* %b
br label %unreach2
unreach2:
%spec = phi i32 [ %sel, %for.end ], [ 42, %then12 ]
unreachable
end:
ret void
}

View File

@ -0,0 +1,11 @@
; RUN: llc -mtriple=aarch64_be-- %s -o /dev/null -debug-only=isel -O0 2>&1 | FileCheck %s
; REQUIRES: asserts
; This test uses big endian in order to force an abort since it's not currently supported for GISel.
; The purpose is to check that we don't fall back to FastISel. Checking the pass structure is insufficient
; because the FastISel is set up in the SelectionDAGISel, so it doesn't appear on the pass structure.
; CHECK-NOT: Enabling fast-ise
define void @empty() {
ret void
}

View File

@ -0,0 +1,14 @@
; RUN: llc -O0 -mtriple=aarch64-apple-ios -o - %s | FileCheck %s
@g = global i16 0, align 2
declare void @bar(i32)
; Check that only one load is generated. We fall back to
define hidden void @foo() {
; CHECK-NOT: ldrh
; CHECK: ldrsh
%1 = load volatile i16, i16* @g, align 2
%2 = sext i16 %1 to i32
call void @bar(i32 %2)
ret void
}

View File

@ -75,7 +75,7 @@ body: |
; CHECK: %w0 = COPY [[C]](s32)
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK: %x0 = COPY [[C1]](s64)
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT half 0xH0000
; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[C2]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
; CHECK: %w0 = COPY [[ANYEXT]](s32)

View File

@ -1,4 +1,6 @@
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s
; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,DAG-ISEL
; RUN: llc -mtriple aarch64-unknown-windows-msvc -fast-isel -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,FAST-ISEL
; RUN: llc -mtriple aarch64-unknown-windows-msvc -O0 -filetype asm -o - %s | FileCheck %s -check-prefixes=CHECK,GLOBAL-ISEL,GLOBAL-ISEL-FALLBACK
@var = external dllimport global i32
@ext = external global i32
@ -23,7 +25,11 @@ define i32 @get_ext() {
; CHECK-LABEL: get_ext
; CHECK: adrp x8, ext
; CHECK: ldr w0, [x8, ext]
; DAG-ISEL: ldr w0, [x8, ext]
; FAST-ISEL: add x8, x8, ext
; FAST-ISEL: ldr w0, [x8]
; GLOBAL-ISEL-FALLBACK: add x8, x8, ext
; GLOBAL-ISEL-FALLBACK: ldr w0, [x8]
; CHECK: ret
define i32* @get_var_pointer() {
@ -31,8 +37,8 @@ define i32* @get_var_pointer() {
}
; CHECK-LABEL: get_var_pointer
; CHECK: adrp x0, __imp_var
; CHECK: ldr x0, [x0, __imp_var]
; CHECK: adrp [[REG1:x[0-9]+]], __imp_var
; CHECK: ldr {{x[0-9]+}}, {{\[}}[[REG1]], __imp_var]
; CHECK: ret
define i32 @call_external() {
@ -51,4 +57,6 @@ define i32 @call_internal() {
}
; CHECK-LABEL: call_internal
; CHECK: b internal
; DAG-ISEL: b internal
; FAST-ISEL: b internal
; GLOBAL-ISEL: bl internal

View File

@ -0,0 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-apple-ios -run-pass regallocfast -o - %s | FileCheck %s
# This test used to crash the fast register alloc.
# Basically, when a basic block has liveins, the fast regalloc
# was deferencing the begin iterator of this block. However,
# when this block is empty and it will just crashed!
---
name: crashing
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: crashing
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: %x0, %x1
; CHECK: bb.1:
; CHECK: renamable %w0 = MOVi32imm -1
; CHECK: RET_ReallyLR implicit killed %w0
bb.1:
liveins: %x0, %x1
bb.2:
%0:gpr32 = MOVi32imm -1
%w0 = COPY %0
RET_ReallyLR implicit %w0
...

View File

@ -234,6 +234,23 @@ define amdgpu_ps void @wqm(float %a) {
ret void
}
; This checks that we use the 64-bit encoding when the operand is a SGPR.
; SI-LABEL: {{^}}test_sgpr:
; SI: v_cmpx_ge_f32_e64
define amdgpu_ps void @test_sgpr(float inreg %a) #0 {
%c = fcmp ole float %a, 1.000000e+00
call void @llvm.amdgcn.kill(i1 %c) #1
ret void
}
; SI-LABEL: {{^}}test_non_inline_imm_sgpr:
; SI-NOT: v_cmpx_ge_f32_e64
define amdgpu_ps void @test_non_inline_imm_sgpr(float inreg %a) #0 {
%c = fcmp ole float %a, 1.500000e+00
call void @llvm.amdgcn.kill(i1 %c) #1
ret void
}
declare void @llvm.amdgcn.kill(i1) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare i1 @llvm.amdgcn.wqm.vote(i1)

View File

@ -66,10 +66,9 @@ ENDIF: ; preds = %LOOP
; OPT-LABEL: define amdgpu_kernel void @multi_if_break_loop(
; OPT: llvm.amdgcn.break
; OPT: llvm.amdgcn.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.loop
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.if.break
; OPT: llvm.amdgcn.end.cf
; GCN-LABEL: {{^}}multi_if_break_loop:

View File

@ -124,100 +124,55 @@ bb23: ; preds = %bb10
; Earlier version of above, before a run of the structurizer.
; IR-LABEL: @nested_loop_conditions(
; IR: %tmp1235 = icmp slt i32 %tmp1134, 9
; IR: br i1 %tmp1235, label %bb14.lr.ph, label %Flow
; IR: bb14.lr.ph:
; IR: br label %bb14
; IR: Flow3:
; IR: call void @llvm.amdgcn.end.cf(i64 %18)
; IR: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %17)
; IR: %1 = extractvalue { i1, i64 } %0, 0
; IR: %2 = extractvalue { i1, i64 } %0, 1
; IR: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
; IR: bb4.bb13_crit_edge:
; IR: br label %Flow4
; IR: Flow4:
; IR: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ]
; IR: call void @llvm.amdgcn.end.cf(i64 %2)
; IR: br label %Flow
; IR: bb13:
; IR: br label %bb31
; IR: Flow:
; IR: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ]
; IR: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4)
; IR: %6 = extractvalue { i1, i64 } %5, 0
; IR: %7 = extractvalue { i1, i64 } %5, 1
; IR: br i1 %6, label %bb13, label %bb31
; IR: bb14:
; IR: %phi.broken = phi i64 [ %18, %Flow2 ], [ 0, %bb14.lr.ph ]
; IR: %tmp1037 = phi i32 [ %tmp1033, %bb14.lr.ph ], [ %16, %Flow2 ]
; IR: %tmp936 = phi <4 x i32> [ %tmp932, %bb14.lr.ph ], [ %15, %Flow2 ]
; IR: %tmp15 = icmp eq i32 %tmp1037, 1
; IR: %8 = xor i1 %tmp15, true
; IR: %9 = call { i1, i64 } @llvm.amdgcn.if(i1 %8)
; IR: %10 = extractvalue { i1, i64 } %9, 0
; IR: %11 = extractvalue { i1, i64 } %9, 1
; IR: br i1 %10, label %bb31.loopexit, label %Flow1
; IR: Flow7:
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %17)
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow8
; IR: Flow1:
; IR: %12 = call { i1, i64 } @llvm.amdgcn.else(i64 %11)
; IR: %13 = extractvalue { i1, i64 } %12, 0
; IR: %14 = extractvalue { i1, i64 } %12, 1
; IR: br i1 %13, label %bb16, label %Flow2
; IR: bb16:
; IR: %tmp17 = bitcast i64 %tmp3 to <2 x i32>
; IR: br label %bb18
; IR-NEXT: %loop.phi = phi i64 [ %loop.phi9, %Flow6 ], [ %phi.broken, %bb14 ]
; IR-NEXT: %13 = phi <4 x i32> [ %29, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %14 = phi i32 [ %30, %Flow6 ], [ undef, %bb14 ]
; IR-NEXT: %15 = phi i1 [ %31, %Flow6 ], [ false, %bb14 ]
; IR-NEXT: %16 = phi i1 [ false, %Flow6 ], [ %8, %bb14 ]
; IR-NEXT: %17 = call i64 @llvm.amdgcn.else.break(i64 %11, i64 %loop.phi)
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %11)
; IR-NEXT: %18 = call i1 @llvm.amdgcn.loop(i64 %17)
; IR-NEXT: br i1 %18, label %Flow7, label %bb14
; IR: Flow2:
; IR: %loop.phi = phi i64 [ %21, %bb21 ], [ %phi.broken, %Flow1 ]
; IR: %15 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %Flow1 ]
; IR: %16 = phi i32 [ %tmp10, %bb21 ], [ undef, %Flow1 ]
; IR: %17 = phi i1 [ %20, %bb21 ], [ false, %Flow1 ]
; IR: %18 = call i64 @llvm.amdgcn.else.break(i64 %14, i64 %loop.phi)
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
; IR: %19 = call i1 @llvm.amdgcn.loop(i64 %18)
; IR: br i1 %19, label %Flow3, label %bb14
; IR: bb18:
; IR: %tmp19 = load volatile i32, i32 addrspace(1)* undef
; IR: %tmp20 = icmp slt i32 %tmp19, 9
; IR: br i1 %tmp20, label %bb21, label %bb18
; IR-NEXT: %loop.phi10 = phi i64 [ %loop.phi11, %Flow5 ], [ %12, %bb16 ]
; IR-NEXT: %19 = phi <4 x i32> [ %29, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %20 = phi i32 [ %30, %Flow5 ], [ undef, %bb16 ]
; IR-NEXT: %21 = phi i1 [ %31, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %22 = phi i1 [ false, %Flow5 ], [ false, %bb16 ]
; IR-NEXT: %23 = phi i1 [ false, %Flow5 ], [ %8, %bb16 ]
; IR-NEXT: %24 = call { i1, i64 } @llvm.amdgcn.if(i1 %23)
; IR-NEXT: %25 = extractvalue { i1, i64 } %24, 0
; IR-NEXT: %26 = extractvalue { i1, i64 } %24, 1
; IR-NEXT: br i1 %25, label %bb21, label %Flow3
; IR: bb21:
; IR: %tmp22 = extractelement <2 x i32> %tmp17, i64 1
; IR: %tmp23 = lshr i32 %tmp22, 16
; IR: %tmp24 = select i1 undef, i32 undef, i32 %tmp23
; IR: %tmp25 = uitofp i32 %tmp24 to float
; IR: %tmp26 = fmul float %tmp25, 0x3EF0001000000000
; IR: %tmp27 = fsub float %tmp26, undef
; IR: %tmp28 = fcmp olt float %tmp27, 5.000000e-01
; IR: %tmp29 = select i1 %tmp28, i64 1, i64 2
; IR: %tmp30 = extractelement <4 x i32> %tmp936, i64 %tmp29
; IR: %tmp7 = zext i32 %tmp30 to i64
; IR: %tmp8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* undef, i64 %tmp7
; IR: %tmp9 = load <4 x i32>, <4 x i32> addrspace(1)* %tmp8, align 16
; IR: %tmp10 = extractelement <4 x i32> %tmp9, i64 0
; IR: %tmp11 = load volatile i32, i32 addrspace(1)* undef
; IR: %tmp12 = icmp slt i32 %tmp11, 9
; IR: %20 = xor i1 %tmp12, true
; IR: %21 = call i64 @llvm.amdgcn.if.break(i1 %20, i64 %phi.broken)
; IR: br label %Flow2
; IR: %tmp12 = icmp slt i32 %tmp11, 9
; IR-NEXT: %27 = xor i1 %tmp12, true
; IR-NEXT: %28 = call i64 @llvm.amdgcn.if.break(i1 %27, i64 %phi.broken)
; IR-NEXT: br label %Flow3
; IR: bb31.loopexit:
; IR: br label %Flow1
; IR: Flow3:
; IR-NEXT: %loop.phi11 = phi i64 [ %phi.broken, %bb21 ], [ %phi.broken, %Flow2 ]
; IR-NEXT: %loop.phi9 = phi i64 [ %28, %bb21 ], [ %loop.phi10, %Flow2 ]
; IR-NEXT: %29 = phi <4 x i32> [ %tmp9, %bb21 ], [ %19, %Flow2 ]
; IR-NEXT: %30 = phi i32 [ %tmp10, %bb21 ], [ %20, %Flow2 ]
; IR-NEXT: %31 = phi i1 [ %27, %bb21 ], [ %21, %Flow2 ]
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %26)
; IR-NEXT: br i1 %22, label %bb31.loopexit, label %Flow4
; IR: bb31:
; IR: call void @llvm.amdgcn.end.cf(i64 %7)
; IR: store volatile i32 0, i32 addrspace(1)* undef
; IR: ret void
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7)
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
; IR-NEXT: ret void
; GCN-LABEL: {{^}}nested_loop_conditions:

View File

@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s
define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) {
; CHECK-LABEL: pluto:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $352, %rsp # imm = 0x160
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
; CHECK-NEXT: vmovaps 144(%rbp), %ymm11
; CHECK-NEXT: vmovaps 112(%rbp), %ymm12
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,18446744071562067968,18446744071562067968]
; CHECK-NEXT: vblendvpd %ymm0, %ymm2, %ymm6, %ymm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm2[0,1],ymm13[2,3],ymm2[4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm2[0,1],ymm8[2,3,4,5,6,7]
; CHECK-NEXT: vmovaps {{.*#+}} ymm13 = [18446744071562067968,18446744071562067968,0,0]
; CHECK-NEXT: vblendvpd %ymm13, %ymm9, %ymm6, %ymm6
; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm11[4,5],ymm0[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[3,2,2,1]
; CHECK-NEXT: vmovaps %xmm6, %xmm11
; CHECK-NEXT: # implicit-def: %ymm13
; CHECK-NEXT: vinserti128 $1, %xmm11, %ymm13, %ymm13
; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm13[4,5],ymm9[6,7]
; CHECK-NEXT: vmovaps %xmm0, %xmm11
; CHECK-NEXT: # implicit-def: %ymm0
; CHECK-NEXT: vinserti128 $1, %xmm11, %ymm0, %ymm0
; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3],ymm7[4,5],ymm8[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm13 = ymm8[2,0,2,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm13[0,1,2,3],ymm0[4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm7[0,1,2,3],ymm2[4,5],ymm7[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,0,2,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm5[4,5],ymm6[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[0,1,1,2]
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm6[4,5,6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm8[2,1,1,3]
; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3,4,5],ymm5[6,7]
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm9, %ymm0
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm5, %ymm1
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm5 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm2, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm5, %ymm2
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm6 # 32-byte Reload
; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm6, %ymm3
; CHECK-NEXT: vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
bb:
%tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
%tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
%tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
%tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>
ret <16 x i64> %tmp7
}

17
test/MC/X86/eval-fill.s Normal file
View File

@ -0,0 +1,17 @@
// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-readobj -s | FileCheck %s
// CHECK: Name: .text
// CHECK-NEXT: Type: SHT_PROGBITS
// CHECK-NEXT: Flags [
// CHECK-NEXT: SHF_ALLOC
// CHECK-NEXT: SHF_EXECINSTR
// CHECK-NEXT: ]
// CHECK-NEXT: Address:
// CHECK-NEXT: Offset:
// CHECK-NEXT: Size: 4092
.globl foo
foo:
.space 4
bar:
.space 4092 - (bar - foo)

View File

@ -10774,3 +10774,9 @@ btcl $4, (%eax)
// CHECK: clzero
// CHECK: encoding: [0x0f,0x01,0xfc]
clzero
// CHECK: lock addl %esi, (%edi)
// INTEL: lock add dword ptr [edi], esi
// CHECK: encoding: [0xf0,0x01,0x37]
lock add %esi, (%edi)

View File

@ -0,0 +1,39 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"
%struct.CFVS = type { %struct.Vec }
%struct.Vec = type { i8 }
%struct.S = type { i8 }
@_ZN4CFVSD1Ev = alias void (%struct.CFVS*), void (%struct.CFVS*)* @_ZN4CFVSD2Ev
define void @_ZN4CFVSD2Ev(%struct.CFVS* %this) unnamed_addr align 2 !dbg !8 {
entry:
%this.addr = alloca %struct.CFVS*, align 8
store %struct.CFVS* %this, %struct.CFVS** %this.addr, align 8
%this1 = load %struct.CFVS*, %struct.CFVS** %this.addr, align 8
%m_val = getelementptr inbounds %struct.CFVS, %struct.CFVS* %this1, i32 0, i32 0
ret void
}
declare dereferenceable(1) %struct.S* @_Z3Getv()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "bz188598-b.cpp", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 2}
!6 = !{i32 7, !"PIC Level", i32 2}
!8 = distinct !DISubprogram(name: "~CFVS", linkageName: "_ZN4CFVSD2Ev", scope: !9, file: !1, line: 2, type: !28, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, declaration: !27, variables: !2)
!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "CFVS", file: !10, line: 7, size: 8, elements: !11, identifier: "_ZTS4CFVS")
!10 = !DIFile(filename: "./bz188598.h", directory: "")
!11 = !{!35}
!27 = !DISubprogram(name: "~CFVS", scope: !9, file: !10, line: 8, type: !28, isLocal: false, isDefinition: false, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !30}
!30 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
!35 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

View File

@ -0,0 +1,62 @@
; RUN: opt -module-summary -o %t1.bc %s
; RUN: opt -module-summary -o %t2.bc %S/Inputs/dicompositetype-unique-alias.ll
; RUN: llvm-lto --thinlto-action=run %t1.bc %t2.bc -thinlto-save-temps=%t3.
; RUN: llvm-dis %t3.0.3.imported.bc -o - | FileCheck %s
; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t --save-temps \
; RUN: -r %t1.bc,_ZN1CD2Ev,pl \
; RUN: -r %t1.bc,_ZN4CFVSD1Ev,l \
; RUN: -r %t1.bc,_ZN4CFVSD2Ev,l \
; RUN: -r %t1.bc,_Z3Getv,l \
; RUN: -r %t2.bc,_ZN4CFVSD1Ev,pl \
; RUN: -r %t2.bc,_ZN4CFVSD2Ev,pl \
; RUN: -r %t2.bc,_Z3Getv,l
; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s
; Only llvm-lto2 adds the dso_local keyword, hence the {{.*}}
; CHECK: define available_externally{{.*}} void @_ZN4CFVSD1Ev
; Confirm that we only have a single DICompositeType after importing
; both an alias and its aliasee, since ODR Type Uniquing is enabled.
; CHECK: DICompositeType
; CHECK-NOT: DICompositeType
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-scei-ps4"
%class.C = type <{ i32 (...)**, %class.A, %struct.CFVS, [6 x i8] }>
%class.A = type { %struct.Vec }
%struct.Vec = type { i8 }
%struct.CFVS = type { %struct.Vec }
%struct.S = type { i8 }
define void @_ZN1CD2Ev(%class.C* %this) unnamed_addr align 2 {
entry:
%this.addr = alloca %class.C*, align 8
%this1 = load %class.C*, %class.C** %this.addr, align 8
%m = getelementptr inbounds %class.C, %class.C* %this1, i32 0, i32 2
call void @_ZN4CFVSD1Ev(%struct.CFVS* %m), !dbg !50
call void @_ZN4CFVSD2Ev(%struct.CFVS* %m), !dbg !50
ret void
}
declare void @_ZN4CFVSD1Ev(%struct.CFVS*) unnamed_addr
declare void @_ZN4CFVSD2Ev(%struct.CFVS*) unnamed_addr
declare dereferenceable(1) %struct.S* @_Z3Getv()
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 6.0.0 (trunk 321360) (llvm/trunk 321359)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "bz188598-a.cpp", directory: ".")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 2}
!6 = !{i32 7, !"PIC Level", i32 2}
!8 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", scope: !1, file: !1, line: 9, type: !47, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!47 = !DISubroutineType(types: !48)
!48 = !{!55}
!50 = !DILocation(line: 9, scope: !51)
!51 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9)
!55 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)

View File

@ -16,3 +16,27 @@ Tail:
%r = call i32 @callee(i32* %a, i32 %v, i32 %p)
ret i32 %r
}
define void @fn1(i16 %p1) {
entry:
ret void
}
define void @fn2() {
ret void
; Unreachable code below
for.inc: ; preds = %for.inc
br i1 undef, label %for.end6, label %for.inc
for.end6: ; preds = %for.inc
br i1 undef, label %lor.rhs, label %lor.end
lor.rhs: ; preds = %for.end6
br label %lor.end
lor.end: ; preds = %for.end6, %lor.rhs
call void @fn1(i16 0)
ret void
}

View File

@ -899,3 +899,24 @@ define i32 @common_factor_umax_extra_use_both(i32 %a, i32 %b, i32 %c) {
ret i32 %max_abc
}
; This would assert. Don't assume that earlier min/max types match a possible later min/max.
define float @not_min_of_min(i8 %i, float %x) {
; CHECK-LABEL: @not_min_of_min(
; CHECK-NEXT: [[CMP1_INV:%.*]] = fcmp fast oge float [[X:%.*]], 1.000000e+00
; CHECK-NEXT: [[MIN1:%.*]] = select i1 [[CMP1_INV]], float 1.000000e+00, float [[X]]
; CHECK-NEXT: [[CMP2_INV:%.*]] = fcmp fast oge float [[X]], 2.000000e+00
; CHECK-NEXT: [[MIN2:%.*]] = select i1 [[CMP2_INV]], float 2.000000e+00, float [[X]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i8 [[I:%.*]], 16
; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP3]], float [[MIN1]], float [[MIN2]]
; CHECK-NEXT: ret float [[R]]
;
%cmp1 = fcmp fast ult float %x, 1.0
%min1 = select i1 %cmp1, float %x, float 1.0
%cmp2 = fcmp fast ult float %x, 2.0
%min2 = select i1 %cmp2, float %x, float 2.0
%cmp3 = icmp ult i8 %i, 16
%r = select i1 %cmp3, float %min1, float %min2
ret float %r
}

View File

@ -1,3 +1,4 @@
; XFAIL: *
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg %s | FileCheck %s

View File

@ -0,0 +1,53 @@
; RUN: opt -S -structurizecfg %s | FileCheck %s
; r321751 introduced a bug where control flow branching from if to exit was
; not handled properly and instead ended up in an infinite loop.
define void @bug36015(i32 %cmp0, i32 %count) {
entry:
br label %loop.outer
loop.outer:
%ctr.loop.outer = phi i32 [ 0, %entry ], [ %ctr.else, %else ]
call void @foo(i32 0)
br label %loop.inner
loop.inner:
%ctr.loop.inner = phi i32 [ %ctr.loop.outer, %loop.outer ], [ %ctr.if, %if ]
call void @foo(i32 1)
%cond.inner = icmp eq i32 %cmp0, %ctr.loop.inner
br i1 %cond.inner, label %if, label %else
; CHECK: if:
; CHECK: %0 = xor i1 %cond.if, true
; CHECK: br label %Flow
if:
%ctr.if = add i32 %ctr.loop.inner, 1
call void @foo(i32 2)
%cond.if = icmp slt i32 %ctr.if, %count
br i1 %cond.if, label %loop.inner, label %exit
; CHECK: Flow:
; CHECK: %2 = phi i1 [ %0, %if ], [ true, %loop.inner ]
; CHECK: %3 = phi i1 [ false, %if ], [ true, %loop.inner ]
; CHECK: br i1 %2, label %Flow1, label %loop.inner
; CHECK: Flow1:
; CHECK: br i1 %3, label %else, label %Flow2
; CHECK: else:
; CHECK: br label %Flow2
else:
%ctr.else = add i32 %ctr.loop.inner, 1
call void @foo(i32 3)
%cond.else = icmp slt i32 %ctr.else, %count
br i1 %cond.else, label %loop.outer, label %exit
; CHECK: Flow2:
; CHECK: %6 = phi i1 [ %4, %else ], [ true, %Flow1 ]
; CHECK: br i1 %6, label %exit, label %loop.outer
exit:
ret void
}
declare void @foo(i32)

View File

@ -1,76 +1,32 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
define void @main(float addrspace(1)* %out) {
; CHECK-LABEL: @main(
; CHECK-NEXT: main_body:
; CHECK-NEXT: br label [[LOOP_OUTER:%.*]]
; CHECK: LOOP.outer:
; CHECK-NEXT: [[TEMP8_0_PH:%.*]] = phi float [ 0.000000e+00, [[MAIN_BODY:%.*]] ], [ [[TMP13:%.*]], [[FLOW3:%.*]] ]
; CHECK-NEXT: [[TEMP4_0_PH:%.*]] = phi i32 [ 0, [[MAIN_BODY]] ], [ [[TMP12:%.*]], [[FLOW3]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: LOOP:
; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ undef, [[LOOP_OUTER]] ], [ [[TMP12]], [[FLOW:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = phi float [ undef, [[LOOP_OUTER]] ], [ [[TMP13]], [[FLOW]] ]
; CHECK-NEXT: [[TEMP4_0:%.*]] = phi i32 [ [[TEMP4_0_PH]], [[LOOP_OUTER]] ], [ [[TMP15:%.*]], [[FLOW]] ]
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TEMP4_0]], 1
; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], 3
; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP22]], true
; CHECK-NEXT: br i1 [[TMP2]], label [[ENDIF:%.*]], label [[FLOW]]
; CHECK: Flow2:
; CHECK-NEXT: [[TMP3:%.*]] = phi float [ [[TEMP8_0_PH]], [[IF29:%.*]] ], [ [[TMP9:%.*]], [[FLOW1:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP20]], [[IF29]] ], [ undef, [[FLOW1]] ]
; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP32:%.*]], [[IF29]] ], [ true, [[FLOW1]] ]
; CHECK-NEXT: br label [[FLOW]]
; CHECK: Flow3:
; CHECK-NEXT: br i1 [[TMP16:%.*]], label [[ENDLOOP:%.*]], label [[LOOP_OUTER]]
; CHECK: ENDLOOP:
; CHECK-NEXT: [[TEMP8_1:%.*]] = phi float [ [[TMP14:%.*]], [[FLOW3]] ]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[TMP20]], 3
; CHECK-NEXT: [[DOT45:%.*]] = select i1 [[TMP23]], float 0.000000e+00, float 1.000000e+00
; CHECK-NEXT: store float [[DOT45]], float addrspace(1)* [[OUT:%.*]]
; CHECK-NEXT: ret void
; CHECK: ENDIF:
; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP20]], 1
; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[TMP31]], true
; CHECK-NEXT: br i1 [[TMP6]], label [[ENDIF28:%.*]], label [[FLOW1]]
; CHECK: Flow1:
; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ [[TMP20]], [[ENDIF28]] ], [ [[TMP0]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = phi float [ [[TMP35:%.*]], [[ENDIF28]] ], [ [[TMP1]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP9]] = phi float [ [[TMP35]], [[ENDIF28]] ], [ [[TEMP8_0_PH]], [[ENDIF]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i1 [ [[TMP36:%.*]], [[ENDIF28]] ], [ true, [[ENDIF]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[ENDIF28]] ], [ true, [[ENDIF]] ]
; CHECK-NEXT: br i1 [[TMP11]], label [[IF29]], label [[FLOW2:%.*]]
; CHECK: IF29:
; CHECK-NEXT: [[TMP32]] = icmp sgt i32 [[TMP20]], 2
; CHECK-NEXT: br label [[FLOW2]]
; CHECK: Flow:
; CHECK-NEXT: [[TMP12]] = phi i32 [ [[TMP7]], [[FLOW2]] ], [ [[TMP0]], [[LOOP]] ]
; CHECK-NEXT: [[TMP13]] = phi float [ [[TMP8]], [[FLOW2]] ], [ [[TMP1]], [[LOOP]] ]
; CHECK-NEXT: [[TMP14]] = phi float [ [[TMP3]], [[FLOW2]] ], [ [[TEMP8_0_PH]], [[LOOP]] ]
; CHECK-NEXT: [[TMP15]] = phi i32 [ [[TMP4]], [[FLOW2]] ], [ undef, [[LOOP]] ]
; CHECK-NEXT: [[TMP16]] = phi i1 [ [[TMP10]], [[FLOW2]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[TMP5]], [[FLOW2]] ], [ true, [[LOOP]] ]
; CHECK-NEXT: br i1 [[TMP17]], label [[FLOW3]], label [[LOOP]]
; CHECK: ENDIF28:
; CHECK-NEXT: [[TMP35]] = fadd float [[TEMP8_0_PH]], 1.000000e+00
; CHECK-NEXT: [[TMP36]] = icmp sgt i32 [[TMP20]], 2
; CHECK-NEXT: br label [[FLOW1]]
;
; CHECK: main_body:
; CHECK: br label %LOOP.outer
main_body:
br label %LOOP.outer
; CHECK: LOOP.outer:
; CHECK: br label %LOOP
LOOP.outer: ; preds = %ENDIF28, %main_body
%temp8.0.ph = phi float [ 0.000000e+00, %main_body ], [ %tmp35, %ENDIF28 ]
%temp4.0.ph = phi i32 [ 0, %main_body ], [ %tmp20, %ENDIF28 ]
br label %LOOP
; CHECK: LOOP:
; br i1 %{{[0-9]+}}, label %ENDIF, label %Flow
LOOP: ; preds = %IF29, %LOOP.outer
%temp4.0 = phi i32 [ %temp4.0.ph, %LOOP.outer ], [ %tmp20, %IF29 ]
%tmp20 = add i32 %temp4.0, 1
%tmp22 = icmp sgt i32 %tmp20, 3
br i1 %tmp22, label %ENDLOOP, label %ENDIF
; CHECK: Flow3
; CHECK: br i1 %{{[0-9]+}}, label %ENDLOOP, label %LOOP.outer
; CHECK: ENDLOOP:
; CHECK: ret void
ENDLOOP: ; preds = %ENDIF28, %IF29, %LOOP
%temp8.1 = phi float [ %temp8.0.ph, %LOOP ], [ %temp8.0.ph, %IF29 ], [ %tmp35, %ENDIF28 ]
%tmp23 = icmp eq i32 %tmp20, 3
@ -78,14 +34,29 @@ ENDLOOP: ; preds = %ENDIF28, %IF29, %LO
store float %.45, float addrspace(1)* %out
ret void
; CHECK: ENDIF:
; CHECK: br i1 %tmp31, label %IF29, label %Flow1
ENDIF: ; preds = %LOOP
%tmp31 = icmp sgt i32 %tmp20, 1
br i1 %tmp31, label %IF29, label %ENDIF28
; CHECK: Flow:
; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
; CHECK: IF29:
; CHECK: br label %Flow1
IF29: ; preds = %ENDIF
%tmp32 = icmp sgt i32 %tmp20, 2
br i1 %tmp32, label %ENDLOOP, label %LOOP
; CHECK: Flow1:
; CHECK: br label %Flow
; CHECK: Flow2:
; CHECK: br i1 %{{[0-9]+}}, label %ENDIF28, label %Flow3
; CHECK: ENDIF28:
; CHECK: br label %Flow3
ENDIF28: ; preds = %ENDIF
%tmp35 = fadd float %temp8.0.ph, 1.0
%tmp36 = icmp sgt i32 %tmp20, 2