Vendor import of llvm release_38 branch r258549:
https://llvm.org/svn/llvm-project/llvm/branches/release_38@258549
This commit is contained in:
parent
dfab1a98e0
commit
dadbdfff07
@ -295,7 +295,7 @@ class MachineFunction {
|
||||
}
|
||||
|
||||
/// Should we be emitting segmented stack stuff for the function
|
||||
bool shouldSplitStack();
|
||||
bool shouldSplitStack() const;
|
||||
|
||||
/// getNumBlockIDs - Return the number of MBB ID's allocated.
|
||||
///
|
||||
|
@ -369,6 +369,18 @@ struct SDNodeFlags {
|
||||
(UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
|
||||
(NoSignedZeros << 6) | (AllowReciprocal << 7);
|
||||
}
|
||||
|
||||
/// Clear any flags in this flag set that aren't also set in Flags.
|
||||
void intersectWith(const SDNodeFlags *Flags) {
|
||||
NoUnsignedWrap &= Flags->NoUnsignedWrap;
|
||||
NoSignedWrap &= Flags->NoSignedWrap;
|
||||
Exact &= Flags->Exact;
|
||||
UnsafeAlgebra &= Flags->UnsafeAlgebra;
|
||||
NoNaNs &= Flags->NoNaNs;
|
||||
NoInfs &= Flags->NoInfs;
|
||||
NoSignedZeros &= Flags->NoSignedZeros;
|
||||
AllowReciprocal &= Flags->AllowReciprocal;
|
||||
}
|
||||
};
|
||||
|
||||
/// Represents one node in the SelectionDAG.
|
||||
@ -682,6 +694,9 @@ class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
|
||||
/// and directly, but it is not to avoid creating a vtable for this class.
|
||||
const SDNodeFlags *getFlags() const;
|
||||
|
||||
/// Clear any flags in this node that aren't also set in Flags.
|
||||
void intersectFlagsWith(const SDNodeFlags *Flags);
|
||||
|
||||
/// Return the number of values defined/returned by this operator.
|
||||
unsigned getNumValues() const { return NumValues; }
|
||||
|
||||
|
@ -346,6 +346,10 @@ class GlobalValue : public Constant {
|
||||
return !(isDeclarationForLinker() || isWeakForLinker());
|
||||
}
|
||||
|
||||
// Returns true if the alignment of the value can be unilaterally
|
||||
// increased.
|
||||
bool canIncreaseAlignment() const;
|
||||
|
||||
/// This method unlinks 'this' from the containing module, but does not delete
|
||||
/// it.
|
||||
virtual void removeFromParent() = 0;
|
||||
|
@ -331,6 +331,25 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
|
||||
/// during lowering by the GC infrastructure.
|
||||
bool callsGCLeafFunction(ImmutableCallSite CS);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Intrinsic pattern matching
|
||||
//
|
||||
|
||||
/// Try and match a bitreverse or bswap idiom.
|
||||
///
|
||||
/// If an idiom is matched, an intrinsic call is inserted before \c I. Any added
|
||||
/// instructions are returned in \c InsertedInsts. They will all have been added
|
||||
/// to a basic block.
|
||||
///
|
||||
/// A bitreverse idiom normally requires around 2*BW nodes to be searched (where
|
||||
/// BW is the bitwidth of the integer type). A bswap idiom requires anywhere up
|
||||
/// to BW / 4 nodes to be searched, so is significantly faster.
|
||||
///
|
||||
/// This function returns true on a successful match or false otherwise.
|
||||
bool recognizeBitReverseOrBSwapIdiom(
|
||||
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
|
||||
SmallVectorImpl<Instruction *> &InsertedInsts);
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -125,8 +125,6 @@ class LibCallSimplifier {
|
||||
Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);
|
||||
|
||||
// Math Library Optimizations
|
||||
Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType);
|
||||
Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
||||
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
||||
|
@ -93,18 +93,7 @@ class DebugLocEntry {
|
||||
/// variable, merge them by appending Next's values to the current
|
||||
/// list of values.
|
||||
/// Return true if the merge was successful.
|
||||
bool MergeValues(const DebugLocEntry &Next) {
|
||||
if (Begin == Next.Begin) {
|
||||
auto *Expr = cast_or_null<DIExpression>(Values[0].Expression);
|
||||
auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression);
|
||||
if (Expr->isBitPiece() && NextExpr->isBitPiece()) {
|
||||
addValues(Next.Values);
|
||||
End = Next.End;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool MergeValues(const DebugLocEntry &Next);
|
||||
|
||||
/// \brief Attempt to merge this DebugLocEntry with Next and return
|
||||
/// true if the merge was successful. Entries can be merged if they
|
||||
|
@ -805,6 +805,24 @@ static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
|
||||
return (l1 < r2) && (l2 < r1);
|
||||
}
|
||||
|
||||
/// \brief If this and Next are describing different pieces of the same
|
||||
/// variable, merge them by appending Next's values to the current
|
||||
/// list of values.
|
||||
/// Return true if the merge was successful.
|
||||
bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
|
||||
if (Begin == Next.Begin) {
|
||||
auto *Expr = cast_or_null<DIExpression>(Values[0].Expression);
|
||||
auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression);
|
||||
if (Expr->isBitPiece() && NextExpr->isBitPiece() &&
|
||||
!piecesOverlap(Expr, NextExpr)) {
|
||||
addValues(Next.Values);
|
||||
End = Next.End;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Build the location list for all DBG_VALUEs in the function that
|
||||
/// describe the same variable. If the ranges of several independent
|
||||
/// pieces of the same variable overlap partially, split them up and
|
||||
|
@ -1742,8 +1742,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
|
||||
// over-aligning global variables that have an explicit section is
|
||||
// forbidden.
|
||||
GlobalVariable *GV;
|
||||
if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() &&
|
||||
!GV->hasSection() && GV->getAlignment() < PrefAlign &&
|
||||
if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
|
||||
GV->getAlignment() < PrefAlign &&
|
||||
DL->getTypeAllocSize(GV->getType()->getElementType()) >=
|
||||
MinSize + Offset2)
|
||||
GV->setAlignment(PrefAlign);
|
||||
@ -5211,6 +5211,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Given an OR instruction, check to see if this is a bitreverse
|
||||
/// idiom. If so, insert the new intrinsic and return true.
|
||||
static bool makeBitReverse(Instruction &I, const DataLayout &DL,
|
||||
const TargetLowering &TLI) {
|
||||
if (!I.getType()->isIntegerTy() ||
|
||||
!TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
|
||||
TLI.getValueType(DL, I.getType(), true)))
|
||||
return false;
|
||||
|
||||
SmallVector<Instruction*, 4> Insts;
|
||||
if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts))
|
||||
return false;
|
||||
Instruction *LastInst = Insts.back();
|
||||
I.replaceAllUsesWith(LastInst);
|
||||
RecursivelyDeleteTriviallyDeadInstructions(&I);
|
||||
return true;
|
||||
}
|
||||
|
||||
// In this pass we look for GEP and cast instructions that are used
|
||||
// across basic blocks and rewrite them to improve basic-block-at-a-time
|
||||
// selection.
|
||||
@ -5224,8 +5242,19 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
|
||||
if (ModifiedDT)
|
||||
return true;
|
||||
}
|
||||
MadeChange |= dupRetToEnableTailCallOpts(&BB);
|
||||
|
||||
bool MadeBitReverse = true;
|
||||
while (TLI && MadeBitReverse) {
|
||||
MadeBitReverse = false;
|
||||
for (auto &I : reverse(BB)) {
|
||||
if (makeBitReverse(I, *DL, *TLI)) {
|
||||
MadeBitReverse = MadeChange = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
MadeChange |= dupRetToEnableTailCallOpts(&BB);
|
||||
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
|
@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
|
||||
}
|
||||
|
||||
/// Should we be emitting segmented stack stuff for the function
|
||||
bool MachineFunction::shouldSplitStack() {
|
||||
bool MachineFunction::shouldSplitStack() const {
|
||||
return getFunction()->hasFnAttribute("split-stack");
|
||||
}
|
||||
|
||||
|
@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add logical or fast math flag values to FoldingSetNodeID value.
|
||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
|
||||
const SDNodeFlags *Flags) {
|
||||
if (!isBinOpWithFlags(Opcode))
|
||||
return;
|
||||
|
||||
unsigned RawFlags = 0;
|
||||
if (Flags)
|
||||
RawFlags = Flags->getRawFlags();
|
||||
ID.AddInteger(RawFlags);
|
||||
}
|
||||
|
||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
|
||||
AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
|
||||
}
|
||||
|
||||
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
|
||||
SDVTList VTList, ArrayRef<SDValue> OpList) {
|
||||
AddNodeIDOpcode(ID, OpC);
|
||||
@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
|
||||
}
|
||||
} // end switch (N->getOpcode())
|
||||
|
||||
AddNodeIDFlags(ID, N);
|
||||
|
||||
// Target specific memory nodes could also have address spaces to check.
|
||||
if (N->isTargetMemoryOpcode())
|
||||
ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
|
||||
@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
|
||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
|
||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
|
||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||
AddNodeIDCustom(ID, N);
|
||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||
if (Node)
|
||||
if (const SDNodeFlags *Flags = N->getFlags())
|
||||
Node->intersectFlagsWith(Flags);
|
||||
return Node;
|
||||
}
|
||||
|
||||
@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
|
||||
SDValue Ops[] = {N1, N2};
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, Opcode, VTs, Ops);
|
||||
AddNodeIDFlags(ID, Opcode, Flags);
|
||||
void *IP = nullptr;
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
|
||||
if (Flags)
|
||||
E->intersectFlagsWith(Flags);
|
||||
return SDValue(E, 0);
|
||||
}
|
||||
|
||||
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
|
||||
|
||||
@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
|
||||
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, Opcode, VTList, Ops);
|
||||
AddNodeIDFlags(ID, Opcode, Flags);
|
||||
void *IP = nullptr;
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
|
||||
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) {
|
||||
if (Flags)
|
||||
E->intersectFlagsWith(Flags);
|
||||
return E;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
|
||||
if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
|
||||
FlagsNode->Flags.intersectWith(Flags);
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
|
||||
assert(N->getNumValues() == 1 &&
|
||||
"Can't unroll a vector with multiple results!");
|
||||
|
@ -12,11 +12,12 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/GlobalAlias.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/GlobalVariable.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/Operator.h"
|
||||
@ -134,6 +135,47 @@ bool GlobalValue::isDeclaration() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool GlobalValue::canIncreaseAlignment() const {
|
||||
// Firstly, can only increase the alignment of a global if it
|
||||
// is a strong definition.
|
||||
if (!isStrongDefinitionForLinker())
|
||||
return false;
|
||||
|
||||
// It also has to either not have a section defined, or, not have
|
||||
// alignment specified. (If it is assigned a section, the global
|
||||
// could be densely packed with other objects in the section, and
|
||||
// increasing the alignment could cause padding issues.)
|
||||
if (hasSection() && getAlignment() > 0)
|
||||
return false;
|
||||
|
||||
// On ELF platforms, we're further restricted in that we can't
|
||||
// increase the alignment of any variable which might be emitted
|
||||
// into a shared library, and which is exported. If the main
|
||||
// executable accesses a variable found in a shared-lib, the main
|
||||
// exe actually allocates memory for and exports the symbol ITSELF,
|
||||
// overriding the symbol found in the library. That is, at link
|
||||
// time, the observed alignment of the variable is copied into the
|
||||
// executable binary. (A COPY relocation is also generated, to copy
|
||||
// the initial data from the shadowed variable in the shared-lib
|
||||
// into the location in the main binary, before running code.)
|
||||
//
|
||||
// And thus, even though you might think you are defining the
|
||||
// global, and allocating the memory for the global in your object
|
||||
// file, and thus should be able to set the alignment arbitrarily,
|
||||
// that's not actually true. Doing so can cause an ABI breakage; an
|
||||
// executable might have already been built with the previous
|
||||
// alignment of the variable, and then assuming an increased
|
||||
// alignment will be incorrect.
|
||||
|
||||
// Conservatively assume ELF if there's no parent pointer.
|
||||
bool isELF =
|
||||
(!Parent || Triple(Parent->getTargetTriple()).isOSBinFormatELF());
|
||||
if (isELF && hasDefaultVisibility() && !hasLocalLinkage())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GlobalVariable Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
|
||||
|
||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
if (AArch64::GPR64RegClass.contains(*I))
|
||||
@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
|
||||
Attribute::NoUnwind) &&
|
||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||
Entry->addLiveIn(*I);
|
||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
NewVR)
|
||||
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||
.addReg(*I);
|
||||
|
||||
// Insert the copy-back instructions right before the terminator.
|
||||
for (auto *Exit : Exits)
|
||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
*I)
|
||||
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), *I)
|
||||
.addReg(NewVR);
|
||||
}
|
||||
}
|
||||
|
@ -112,9 +112,21 @@ class AArch64ELFStreamer : public MCELFStreamer {
|
||||
MCELFStreamer::EmitInstruction(Inst, STI);
|
||||
}
|
||||
|
||||
/// Emit a 32-bit value as an instruction. This is only used for the .inst
|
||||
/// directive, EmitInstruction should be used in other cases.
|
||||
void emitInst(uint32_t Inst) {
|
||||
char Buffer[4];
|
||||
|
||||
// We can't just use EmitIntValue here, as that will emit a data mapping
|
||||
// symbol, and swap the endianness on big-endian systems (instructions are
|
||||
// always little-endian).
|
||||
for (unsigned I = 0; I < 4; ++I) {
|
||||
Buffer[I] = uint8_t(Inst);
|
||||
Inst >>= 8;
|
||||
}
|
||||
|
||||
EmitA64MappingSymbol();
|
||||
MCELFStreamer::EmitIntValue(Inst, 4);
|
||||
MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
|
||||
}
|
||||
|
||||
/// This is one of the functions used to emit data into an ELF section, so the
|
||||
|
@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(
|
||||
|
||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
if (ARM::GPRRegClass.contains(*I))
|
||||
@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR(
|
||||
Attribute::NoUnwind) &&
|
||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||
Entry->addLiveIn(*I);
|
||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
NewVR)
|
||||
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||
.addReg(*I);
|
||||
|
||||
// Insert the copy-back instructions right before the terminator.
|
||||
for (auto *Exit : Exits)
|
||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
*I)
|
||||
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), *I)
|
||||
.addReg(NewVR);
|
||||
}
|
||||
}
|
||||
|
@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
|
||||
R8, R9, R10, R11)>;
|
||||
|
||||
// CSRs that are handled by prologue, epilogue.
|
||||
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
|
||||
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;
|
||||
|
||||
// CSRs that are handled explicitly via copies.
|
||||
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
|
||||
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>;
|
||||
|
||||
// All GPRs - except r11
|
||||
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
|
||||
|
@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks(
|
||||
unsigned TlsReg, TlsOffset;
|
||||
DebugLoc DL;
|
||||
|
||||
// To support shrink-wrapping we would need to insert the new blocks
|
||||
// at the right place and update the branches to PrologueMBB.
|
||||
assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
|
||||
|
||||
unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
|
||||
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
|
||||
"Scratch register is live-in");
|
||||
@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue(
|
||||
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
DebugLoc DL;
|
||||
|
||||
// To support shrink-wrapping we would need to insert the new blocks
|
||||
// at the right place and update the branches to PrologueMBB.
|
||||
assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
|
||||
|
||||
// HiPE-specific values
|
||||
const unsigned HipeLeafWords = 24;
|
||||
const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
|
||||
@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
|
||||
bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
|
||||
// If we may need to emit frameless compact unwind information, give
|
||||
// up as this is currently broken: PR25614.
|
||||
return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF);
|
||||
return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) &&
|
||||
// The lowering of segmented stack and HiPE only support entry blocks
|
||||
// as prologue blocks: PR26107.
|
||||
// This limitation may be lifted if we fix:
|
||||
// - adjustForSegmentedStacks
|
||||
// - adjustForHiPEPrologue
|
||||
MF.getFunction()->getCallingConv() != CallingConv::HiPE &&
|
||||
!MF.shouldSplitStack();
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
|
||||
|
@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR(
|
||||
|
||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
if (X86::GR64RegClass.contains(*I))
|
||||
@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR(
|
||||
Attribute::NoUnwind) &&
|
||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||
Entry->addLiveIn(*I);
|
||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
NewVR)
|
||||
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||
.addReg(*I);
|
||||
|
||||
// Insert the copy-back instructions right before the terminator.
|
||||
for (auto *Exit : Exits)
|
||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
||||
*I)
|
||||
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), *I)
|
||||
.addReg(NewVR);
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/PatternMatch.h"
|
||||
#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
using namespace llvm;
|
||||
using namespace PatternMatch;
|
||||
|
||||
@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
||||
return Changed ? &I : nullptr;
|
||||
}
|
||||
|
||||
|
||||
/// Analyze the specified subexpression and see if it is capable of providing
|
||||
/// pieces of a bswap or bitreverse. The subexpression provides a potential
|
||||
/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
|
||||
/// the output of the expression came from a corresponding bit in some other
|
||||
/// value. This function is recursive, and the end result is a mapping of
|
||||
/// (value, bitnumber) to bitnumber. It is the caller's responsibility to
|
||||
/// validate that all `value`s are identical and that the bitnumber to bitnumber
|
||||
/// mapping is correct for a bswap or bitreverse.
|
||||
///
|
||||
/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
|
||||
/// that the expression deposits the low byte of %X into the high byte of the
|
||||
/// result and that all other bits are zero. This expression is accepted,
|
||||
/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].
|
||||
///
|
||||
/// This function returns true if the match was unsuccessful and false if so.
|
||||
/// On entry to the function the "OverallLeftShift" is a signed integer value
|
||||
/// indicating the number of bits that the subexpression is later shifted. For
|
||||
/// example, if the expression is later right shifted by 16 bits, the
|
||||
/// OverallLeftShift value would be -16 on entry. This is used to specify which
|
||||
/// bits of BitValues are actually being set.
|
||||
///
|
||||
/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
|
||||
/// bit is masked to zero by a user. For example, in (X & 255), X will be
|
||||
/// processed with a bytemask of 255. BitMask is always in the local
|
||||
/// (OverallLeftShift) coordinate space.
|
||||
///
|
||||
static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
|
||||
SmallVectorImpl<Value *> &BitValues,
|
||||
SmallVectorImpl<int> &BitProvenance) {
|
||||
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
// If this is an or instruction, it may be an inner node of the bswap.
|
||||
if (I->getOpcode() == Instruction::Or)
|
||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
||||
BitValues, BitProvenance) ||
|
||||
CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
|
||||
BitValues, BitProvenance);
|
||||
|
||||
// If this is a logical shift by a constant, recurse with OverallLeftShift
|
||||
// and BitMask adjusted.
|
||||
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
|
||||
unsigned ShAmt =
|
||||
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
|
||||
// Ensure the shift amount is defined.
|
||||
if (ShAmt > BitValues.size())
|
||||
return true;
|
||||
|
||||
unsigned BitShift = ShAmt;
|
||||
if (I->getOpcode() == Instruction::Shl) {
|
||||
// X << C -> collect(X, +C)
|
||||
OverallLeftShift += BitShift;
|
||||
BitMask = BitMask.lshr(BitShift);
|
||||
} else {
|
||||
// X >>u C -> collect(X, -C)
|
||||
OverallLeftShift -= BitShift;
|
||||
BitMask = BitMask.shl(BitShift);
|
||||
}
|
||||
|
||||
if (OverallLeftShift >= (int)BitValues.size())
|
||||
return true;
|
||||
if (OverallLeftShift <= -(int)BitValues.size())
|
||||
return true;
|
||||
|
||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
||||
BitValues, BitProvenance);
|
||||
}
|
||||
|
||||
// If this is a logical 'and' with a mask that clears bits, clear the
|
||||
// corresponding bits in BitMask.
|
||||
if (I->getOpcode() == Instruction::And &&
|
||||
isa<ConstantInt>(I->getOperand(1))) {
|
||||
unsigned NumBits = BitValues.size();
|
||||
APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
|
||||
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
|
||||
|
||||
for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {
|
||||
// If this bit is masked out by a later operation, we don't care what
|
||||
// the and mask is.
|
||||
if (BitMask[i] == 0)
|
||||
continue;
|
||||
|
||||
// If the AndMask is zero for this bit, clear the bit.
|
||||
APInt MaskB = AndMask & Bit;
|
||||
if (MaskB == 0) {
|
||||
BitMask.clearBit(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, this bit is kept.
|
||||
}
|
||||
|
||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
||||
BitValues, BitProvenance);
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
|
||||
// the input value to the bswap/bitreverse. To be part of a bswap or
|
||||
// bitreverse we must be demanding a contiguous range of bits from it.
|
||||
unsigned InputBitLen = BitMask.countPopulation();
|
||||
unsigned InputBitNo = BitMask.countTrailingZeros();
|
||||
if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=
|
||||
InputBitLen)
|
||||
// Not a contiguous set range of bits!
|
||||
return true;
|
||||
|
||||
// We know we're moving a contiguous range of bits from the input to the
|
||||
// output. Record which bits in the output came from which bits in the input.
|
||||
unsigned DestBitNo = InputBitNo + OverallLeftShift;
|
||||
for (unsigned I = 0; I < InputBitLen; ++I)
|
||||
BitProvenance[DestBitNo + I] = InputBitNo + I;
|
||||
|
||||
// If the destination bit value is already defined, the values are or'd
|
||||
// together, which isn't a bswap/bitreverse (unless it's an or of the same
|
||||
// bits).
|
||||
if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)
|
||||
return true;
|
||||
for (unsigned I = 0; I < InputBitLen; ++I)
|
||||
BitValues[DestBitNo + I] = V;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
|
||||
unsigned BitWidth) {
|
||||
if (From % 8 != To % 8)
|
||||
return false;
|
||||
// Convert from bit indices to byte indices and check for a byte reversal.
|
||||
From >>= 3;
|
||||
To >>= 3;
|
||||
BitWidth >>= 3;
|
||||
return From == BitWidth - To - 1;
|
||||
}
|
||||
|
||||
static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
|
||||
unsigned BitWidth) {
|
||||
return From == BitWidth - To - 1;
|
||||
}
|
||||
|
||||
/// Given an OR instruction, check to see if this is a bswap or bitreverse
|
||||
/// idiom. If so, insert the new intrinsic and return it.
|
||||
Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
|
||||
IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
|
||||
if (!ITy)
|
||||
return nullptr; // Can't do vectors.
|
||||
unsigned BW = ITy->getBitWidth();
|
||||
|
||||
/// We keep track of which bit (BitProvenance) inside which value (BitValues)
|
||||
/// defines each bit in the result.
|
||||
SmallVector<Value *, 8> BitValues(BW, nullptr);
|
||||
SmallVector<int, 8> BitProvenance(BW, -1);
|
||||
|
||||
// Try to find all the pieces corresponding to the bswap.
|
||||
APInt BitMask = APInt::getAllOnesValue(BitValues.size());
|
||||
if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))
|
||||
SmallVector<Instruction*, 4> Insts;
|
||||
if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts))
|
||||
return nullptr;
|
||||
Instruction *LastInst = Insts.pop_back_val();
|
||||
LastInst->removeFromParent();
|
||||
|
||||
// Check to see if all of the bits come from the same value.
|
||||
Value *V = BitValues[0];
|
||||
if (!V) return nullptr; // Didn't find a bit? Must be zero.
|
||||
|
||||
if (!std::all_of(BitValues.begin(), BitValues.end(),
|
||||
[&](const Value *X) { return X == V; }))
|
||||
return nullptr;
|
||||
|
||||
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
|
||||
// only byteswap values with an even number of bytes.
|
||||
bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;
|
||||
for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {
|
||||
OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
|
||||
OKForBitReverse &=
|
||||
bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
|
||||
}
|
||||
|
||||
Intrinsic::ID Intrin;
|
||||
if (OKForBSwap)
|
||||
Intrin = Intrinsic::bswap;
|
||||
else if (OKForBitReverse)
|
||||
Intrin = Intrinsic::bitreverse;
|
||||
else
|
||||
return nullptr;
|
||||
|
||||
Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);
|
||||
return CallInst::Create(F, V);
|
||||
for (auto *Inst : Insts)
|
||||
Worklist.Add(Inst);
|
||||
return LastInst;
|
||||
}
|
||||
|
||||
/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)
|
||||
|
@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume(
|
||||
RI->eraseFromParent();
|
||||
}
|
||||
|
||||
/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
|
||||
static Value *getParentPad(Value *EHPad) {
|
||||
if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
|
||||
return FPI->getParentPad();
|
||||
return cast<CatchSwitchInst>(EHPad)->getParentPad();
|
||||
}
|
||||
|
||||
typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;
|
||||
|
||||
/// Helper for getUnwindDestToken that does the descendant-ward part of
|
||||
/// the search.
|
||||
static Value *getUnwindDestTokenHelper(Instruction *EHPad,
|
||||
UnwindDestMemoTy &MemoMap) {
|
||||
SmallVector<Instruction *, 8> Worklist(1, EHPad);
|
||||
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *CurrentPad = Worklist.pop_back_val();
|
||||
// We only put pads on the worklist that aren't in the MemoMap. When
|
||||
// we find an unwind dest for a pad we may update its ancestors, but
|
||||
// the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
|
||||
// so they should never get updated while queued on the worklist.
|
||||
assert(!MemoMap.count(CurrentPad));
|
||||
Value *UnwindDestToken = nullptr;
|
||||
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
|
||||
if (CatchSwitch->hasUnwindDest()) {
|
||||
UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
|
||||
} else {
|
||||
// Catchswitch doesn't have a 'nounwind' variant, and one might be
|
||||
// annotated as "unwinds to caller" when really it's nounwind (see
|
||||
// e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
|
||||
// parent's unwind dest from this. We can check its catchpads'
|
||||
// descendants, since they might include a cleanuppad with an
|
||||
// "unwinds to caller" cleanupret, which can be trusted.
|
||||
for (auto HI = CatchSwitch->handler_begin(),
|
||||
HE = CatchSwitch->handler_end();
|
||||
HI != HE && !UnwindDestToken; ++HI) {
|
||||
BasicBlock *HandlerBlock = *HI;
|
||||
auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
|
||||
for (User *Child : CatchPad->users()) {
|
||||
// Intentionally ignore invokes here -- since the catchswitch is
|
||||
// marked "unwind to caller", it would be a verifier error if it
|
||||
// contained an invoke which unwinds out of it, so any invoke we'd
|
||||
// encounter must unwind to some child of the catch.
|
||||
if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
|
||||
continue;
|
||||
|
||||
Instruction *ChildPad = cast<Instruction>(Child);
|
||||
auto Memo = MemoMap.find(ChildPad);
|
||||
if (Memo == MemoMap.end()) {
|
||||
// Haven't figure out this child pad yet; queue it.
|
||||
Worklist.push_back(ChildPad);
|
||||
continue;
|
||||
}
|
||||
// We've already checked this child, but might have found that
|
||||
// it offers no proof either way.
|
||||
Value *ChildUnwindDestToken = Memo->second;
|
||||
if (!ChildUnwindDestToken)
|
||||
continue;
|
||||
// We already know the child's unwind dest, which can either
|
||||
// be ConstantTokenNone to indicate unwind to caller, or can
|
||||
// be another child of the catchpad. Only the former indicates
|
||||
// the unwind dest of the catchswitch.
|
||||
if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
|
||||
UnwindDestToken = ChildUnwindDestToken;
|
||||
break;
|
||||
}
|
||||
assert(getParentPad(ChildUnwindDestToken) == CatchPad);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
|
||||
for (User *U : CleanupPad->users()) {
|
||||
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
|
||||
if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
|
||||
UnwindDestToken = RetUnwindDest->getFirstNonPHI();
|
||||
else
|
||||
UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
|
||||
break;
|
||||
}
|
||||
Value *ChildUnwindDestToken;
|
||||
if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
|
||||
ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
|
||||
} else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
|
||||
Instruction *ChildPad = cast<Instruction>(U);
|
||||
auto Memo = MemoMap.find(ChildPad);
|
||||
if (Memo == MemoMap.end()) {
|
||||
// Haven't resolved this child yet; queue it and keep searching.
|
||||
Worklist.push_back(ChildPad);
|
||||
continue;
|
||||
}
|
||||
// We've checked this child, but still need to ignore it if it
|
||||
// had no proof either way.
|
||||
ChildUnwindDestToken = Memo->second;
|
||||
if (!ChildUnwindDestToken)
|
||||
continue;
|
||||
} else {
|
||||
// Not a relevant user of the cleanuppad
|
||||
continue;
|
||||
}
|
||||
// In a well-formed program, the child/invoke must either unwind to
|
||||
// an(other) child of the cleanup, or exit the cleanup. In the
|
||||
// first case, continue searching.
|
||||
if (isa<Instruction>(ChildUnwindDestToken) &&
|
||||
getParentPad(ChildUnwindDestToken) == CleanupPad)
|
||||
continue;
|
||||
UnwindDestToken = ChildUnwindDestToken;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we haven't found an unwind dest for CurrentPad, we may have queued its
|
||||
// children, so move on to the next in the worklist.
|
||||
if (!UnwindDestToken)
|
||||
continue;
|
||||
|
||||
// Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
|
||||
// any ancestors of CurrentPad up to but not including UnwindDestToken's
|
||||
// parent pad. Record this in the memo map, and check to see if the
|
||||
// original EHPad being queried is one of the ones exited.
|
||||
Value *UnwindParent;
|
||||
if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
|
||||
UnwindParent = getParentPad(UnwindPad);
|
||||
else
|
||||
UnwindParent = nullptr;
|
||||
bool ExitedOriginalPad = false;
|
||||
for (Instruction *ExitedPad = CurrentPad;
|
||||
ExitedPad && ExitedPad != UnwindParent;
|
||||
ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
|
||||
// Skip over catchpads since they just follow their catchswitches.
|
||||
if (isa<CatchPadInst>(ExitedPad))
|
||||
continue;
|
||||
MemoMap[ExitedPad] = UnwindDestToken;
|
||||
ExitedOriginalPad |= (ExitedPad == EHPad);
|
||||
}
|
||||
|
||||
if (ExitedOriginalPad)
|
||||
return UnwindDestToken;
|
||||
|
||||
// Continue the search.
|
||||
}
|
||||
|
||||
// No definitive information is contained within this funclet.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
|
||||
/// return that pad instruction. If it unwinds to caller, return
|
||||
/// ConstantTokenNone. If it does not have a definitive unwind destination,
|
||||
/// return nullptr.
|
||||
///
|
||||
/// This routine gets invoked for calls in funclets in inlinees when inlining
|
||||
/// an invoke. Since many funclets don't have calls inside them, it's queried
|
||||
/// on-demand rather than building a map of pads to unwind dests up front.
|
||||
/// Determining a funclet's unwind dest may require recursively searching its
|
||||
/// descendants, and also ancestors and cousins if the descendants don't provide
|
||||
/// an answer. Since most funclets will have their unwind dest immediately
|
||||
/// available as the unwind dest of a catchswitch or cleanupret, this routine
|
||||
/// searches top-down from the given pad and then up. To avoid worst-case
|
||||
/// quadratic run-time given that approach, it uses a memo map to avoid
|
||||
/// re-processing funclet trees. The callers that rewrite the IR as they go
|
||||
/// take advantage of this, for correctness, by checking/forcing rewritten
|
||||
/// pads' entries to match the original callee view.
|
||||
static Value *getUnwindDestToken(Instruction *EHPad,
|
||||
UnwindDestMemoTy &MemoMap) {
|
||||
// Catchpads unwind to the same place as their catchswitch;
|
||||
// redirct any queries on catchpads so the code below can
|
||||
// deal with just catchswitches and cleanuppads.
|
||||
if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
|
||||
EHPad = CPI->getCatchSwitch();
|
||||
|
||||
// Check if we've already determined the unwind dest for this pad.
|
||||
auto Memo = MemoMap.find(EHPad);
|
||||
if (Memo != MemoMap.end())
|
||||
return Memo->second;
|
||||
|
||||
// Search EHPad and, if necessary, its descendants.
|
||||
Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
|
||||
assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
|
||||
if (UnwindDestToken)
|
||||
return UnwindDestToken;
|
||||
|
||||
// No information is available for this EHPad from itself or any of its
|
||||
// descendants. An unwind all the way out to a pad in the caller would
|
||||
// need also to agree with the unwind dest of the parent funclet, so
|
||||
// search up the chain to try to find a funclet with information. Put
|
||||
// null entries in the memo map to avoid re-processing as we go up.
|
||||
MemoMap[EHPad] = nullptr;
|
||||
Instruction *LastUselessPad = EHPad;
|
||||
Value *AncestorToken;
|
||||
for (AncestorToken = getParentPad(EHPad);
|
||||
auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
|
||||
AncestorToken = getParentPad(AncestorToken)) {
|
||||
// Skip over catchpads since they just follow their catchswitches.
|
||||
if (isa<CatchPadInst>(AncestorPad))
|
||||
continue;
|
||||
assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
|
||||
auto AncestorMemo = MemoMap.find(AncestorPad);
|
||||
if (AncestorMemo == MemoMap.end()) {
|
||||
UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
|
||||
} else {
|
||||
UnwindDestToken = AncestorMemo->second;
|
||||
}
|
||||
if (UnwindDestToken)
|
||||
break;
|
||||
LastUselessPad = AncestorPad;
|
||||
}
|
||||
|
||||
// Since the whole tree under LastUselessPad has no information, it all must
|
||||
// match UnwindDestToken; record that to avoid repeating the search.
|
||||
SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *UselessPad = Worklist.pop_back_val();
|
||||
assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr);
|
||||
MemoMap[UselessPad] = UnwindDestToken;
|
||||
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
|
||||
for (BasicBlock *HandlerBlock : CatchSwitch->handlers())
|
||||
for (User *U : HandlerBlock->getFirstNonPHI()->users())
|
||||
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
|
||||
Worklist.push_back(cast<Instruction>(U));
|
||||
} else {
|
||||
assert(isa<CleanupPadInst>(UselessPad));
|
||||
for (User *U : UselessPad->users())
|
||||
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
|
||||
Worklist.push_back(cast<Instruction>(U));
|
||||
}
|
||||
}
|
||||
|
||||
return UnwindDestToken;
|
||||
}
|
||||
|
||||
/// When we inline a basic block into an invoke,
|
||||
/// we have to turn all of the calls that can throw into invokes.
|
||||
/// This function analyze BB to see if there are any calls, and if so,
|
||||
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
|
||||
/// nodes in that block with the values specified in InvokeDestPHIValues.
|
||||
static BasicBlock *
|
||||
HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
|
||||
static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
|
||||
BasicBlock *BB, BasicBlock *UnwindEdge,
|
||||
UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
|
||||
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
|
||||
Instruction *I = &*BBI++;
|
||||
|
||||
@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
|
||||
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
|
||||
continue;
|
||||
|
||||
if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
|
||||
// This call is nested inside a funclet. If that funclet has an unwind
|
||||
// destination within the inlinee, then unwinding out of this call would
|
||||
// be UB. Rewriting this call to an invoke which targets the inlined
|
||||
// invoke's unwind dest would give the call's parent funclet multiple
|
||||
// unwind destinations, which is something that subsequent EH table
|
||||
// generation can't handle and that the veirifer rejects. So when we
|
||||
// see such a call, leave it as a call.
|
||||
auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
|
||||
Value *UnwindDestToken =
|
||||
getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
|
||||
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
|
||||
continue;
|
||||
#ifndef NDEBUG
|
||||
Instruction *MemoKey;
|
||||
if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
|
||||
MemoKey = CatchPad->getCatchSwitch();
|
||||
else
|
||||
MemoKey = FuncletPad;
|
||||
assert(FuncletUnwindMap->count(MemoKey) &&
|
||||
(*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
|
||||
"must get memoized to avoid confusing later searches");
|
||||
#endif // NDEBUG
|
||||
}
|
||||
|
||||
// Convert this function call into an invoke instruction. First, split the
|
||||
// basic block.
|
||||
BasicBlock *Split =
|
||||
@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
||||
|
||||
// This connects all the instructions which 'unwind to caller' to the invoke
|
||||
// destination.
|
||||
UnwindDestMemoTy FuncletUnwindMap;
|
||||
for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
|
||||
BB != E; ++BB) {
|
||||
if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
|
||||
if (CRI->unwindsToCaller()) {
|
||||
CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);
|
||||
auto *CleanupPad = CRI->getCleanupPad();
|
||||
CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
|
||||
CRI->eraseFromParent();
|
||||
UpdatePHINodes(&*BB);
|
||||
// Finding a cleanupret with an unwind destination would confuse
|
||||
// subsequent calls to getUnwindDestToken, so map the cleanuppad
|
||||
// to short-circuit any such calls and recognize this as an "unwind
|
||||
// to caller" cleanup.
|
||||
assert(!FuncletUnwindMap.count(CleanupPad) ||
|
||||
isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
|
||||
FuncletUnwindMap[CleanupPad] =
|
||||
ConstantTokenNone::get(Caller->getContext());
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
||||
Instruction *Replacement = nullptr;
|
||||
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
|
||||
if (CatchSwitch->unwindsToCaller()) {
|
||||
Value *UnwindDestToken;
|
||||
if (auto *ParentPad =
|
||||
dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
|
||||
// This catchswitch is nested inside another funclet. If that
|
||||
// funclet has an unwind destination within the inlinee, then
|
||||
// unwinding out of this catchswitch would be UB. Rewriting this
|
||||
// catchswitch to unwind to the inlined invoke's unwind dest would
|
||||
// give the parent funclet multiple unwind destinations, which is
|
||||
// something that subsequent EH table generation can't handle and
|
||||
// that the veirifer rejects. So when we see such a call, leave it
|
||||
// as "unwind to caller".
|
||||
UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
|
||||
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
|
||||
continue;
|
||||
} else {
|
||||
// This catchswitch has no parent to inherit constraints from, and
|
||||
// none of its descendants can have an unwind edge that exits it and
|
||||
// targets another funclet in the inlinee. It may or may not have a
|
||||
// descendant that definitively has an unwind to caller. In either
|
||||
// case, we'll have to assume that any unwinds out of it may need to
|
||||
// be routed to the caller, so treat it as though it has a definitive
|
||||
// unwind to caller.
|
||||
UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
|
||||
}
|
||||
auto *NewCatchSwitch = CatchSwitchInst::Create(
|
||||
CatchSwitch->getParentPad(), UnwindDest,
|
||||
CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
|
||||
CatchSwitch);
|
||||
for (BasicBlock *PadBB : CatchSwitch->handlers())
|
||||
NewCatchSwitch->addHandler(PadBB);
|
||||
// Propagate info for the old catchswitch over to the new one in
|
||||
// the unwind map. This also serves to short-circuit any subsequent
|
||||
// checks for the unwind dest of this catchswitch, which would get
|
||||
// confused if they found the outer handler in the callee.
|
||||
FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
|
||||
Replacement = NewCatchSwitch;
|
||||
}
|
||||
} else if (!isa<FuncletPadInst>(I)) {
|
||||
@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
||||
for (Function::iterator BB = FirstNewBlock->getIterator(),
|
||||
E = Caller->end();
|
||||
BB != E; ++BB)
|
||||
if (BasicBlock *NewBB =
|
||||
HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))
|
||||
if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
|
||||
&*BB, UnwindDest, &FuncletUnwindMap))
|
||||
// Update any PHI nodes in the exceptional block to indicate that there
|
||||
// is now a new entry in them.
|
||||
UpdatePHINodes(NewBB);
|
||||
@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
||||
}
|
||||
}
|
||||
|
||||
// If we are inlining for an invoke instruction, we must make sure to rewrite
|
||||
// any call instructions into invoke instructions. This is sensitive to which
|
||||
// funclet pads were top-level in the inlinee, so must be done before
|
||||
// rewriting the "parent pad" links.
|
||||
if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
|
||||
BasicBlock *UnwindDest = II->getUnwindDest();
|
||||
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
|
||||
if (isa<LandingPadInst>(FirstNonPHI)) {
|
||||
HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||
} else {
|
||||
HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the lexical scopes of the new funclets and callsites.
|
||||
// Anything that had 'none' as its parent is now nested inside the callsite's
|
||||
// EHPad.
|
||||
@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
||||
}
|
||||
}
|
||||
|
||||
// If we are inlining for an invoke instruction, we must make sure to rewrite
|
||||
// any call instructions into invoke instructions.
|
||||
if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
|
||||
BasicBlock *UnwindDest = II->getUnwindDest();
|
||||
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
|
||||
if (isa<LandingPadInst>(FirstNonPHI)) {
|
||||
HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||
} else {
|
||||
HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle any inlined musttail call sites. In order for a new call site to be
|
||||
// musttail, the source of the clone and the inlined call site must have been
|
||||
// musttail. Therefore it's safe to return without merging control into the
|
||||
|
@ -944,37 +944,44 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
|
||||
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
|
||||
unsigned PrefAlign,
|
||||
const DataLayout &DL) {
|
||||
assert(PrefAlign > Align);
|
||||
|
||||
V = V->stripPointerCasts();
|
||||
|
||||
if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
|
||||
// TODO: ideally, computeKnownBits ought to have used
|
||||
// AllocaInst::getAlignment() in its computation already, making
|
||||
// the below max redundant. But, as it turns out,
|
||||
// stripPointerCasts recurses through infinite layers of bitcasts,
|
||||
// while computeKnownBits is not allowed to traverse more than 6
|
||||
// levels.
|
||||
Align = std::max(AI->getAlignment(), Align);
|
||||
if (PrefAlign <= Align)
|
||||
return Align;
|
||||
|
||||
// If the preferred alignment is greater than the natural stack alignment
|
||||
// then don't round up. This avoids dynamic stack realignment.
|
||||
if (DL.exceedsNaturalStackAlignment(PrefAlign))
|
||||
return Align;
|
||||
// If there is a requested alignment and if this is an alloca, round up.
|
||||
if (AI->getAlignment() >= PrefAlign)
|
||||
return AI->getAlignment();
|
||||
AI->setAlignment(PrefAlign);
|
||||
return PrefAlign;
|
||||
}
|
||||
|
||||
if (auto *GO = dyn_cast<GlobalObject>(V)) {
|
||||
// TODO: as above, this shouldn't be necessary.
|
||||
Align = std::max(GO->getAlignment(), Align);
|
||||
if (PrefAlign <= Align)
|
||||
return Align;
|
||||
|
||||
// If there is a large requested alignment and we can, bump up the alignment
|
||||
// of the global. If the memory we set aside for the global may not be the
|
||||
// memory used by the final program then it is impossible for us to reliably
|
||||
// enforce the preferred alignment.
|
||||
if (!GO->isStrongDefinitionForLinker())
|
||||
if (!GO->canIncreaseAlignment())
|
||||
return Align;
|
||||
|
||||
if (GO->getAlignment() >= PrefAlign)
|
||||
return GO->getAlignment();
|
||||
// We can only increase the alignment of the global if it has no alignment
|
||||
// specified or if it is not assigned a section. If it is assigned a
|
||||
// section, the global could be densely packed with other objects in the
|
||||
// section, increasing the alignment could cause padding issues.
|
||||
if (!GO->hasSection() || GO->getAlignment() == 0)
|
||||
GO->setAlignment(PrefAlign);
|
||||
return GO->getAlignment();
|
||||
GO->setAlignment(PrefAlign);
|
||||
return PrefAlign;
|
||||
}
|
||||
|
||||
return Align;
|
||||
@ -1585,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// A potential constituent of a bitreverse or bswap expression. See
|
||||
/// collectBitParts for a fuller explanation.
|
||||
struct BitPart {
|
||||
BitPart(Value *P, unsigned BW) : Provider(P) {
|
||||
Provenance.resize(BW);
|
||||
}
|
||||
|
||||
/// The Value that this is a bitreverse/bswap of.
|
||||
Value *Provider;
|
||||
/// The "provenance" of each bit. Provenance[A] = B means that bit A
|
||||
/// in Provider becomes bit B in the result of this expression.
|
||||
SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
|
||||
|
||||
enum { Unset = -1 };
|
||||
};
|
||||
|
||||
/// Analyze the specified subexpression and see if it is capable of providing
|
||||
/// pieces of a bswap or bitreverse. The subexpression provides a potential
|
||||
/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
|
||||
/// the output of the expression came from a corresponding bit in some other
|
||||
/// value. This function is recursive, and the end result is a mapping of
|
||||
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
|
||||
/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.
|
||||
///
|
||||
/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
|
||||
/// that the expression deposits the low byte of %X into the high byte of the
|
||||
/// result and that all other bits are zero. This expression is accepted and a
|
||||
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
|
||||
/// [0-7].
|
||||
///
|
||||
/// To avoid revisiting values, the BitPart results are memoized into the
|
||||
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
|
||||
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
|
||||
/// store BitParts objects, not pointers. As we need the concept of a nullptr
|
||||
/// BitParts (Value has been analyzed and the analysis failed), we an Optional
|
||||
/// type instead to provide the same functionality.
|
||||
///
|
||||
/// Because we pass around references into \c BPS, we must use a container that
|
||||
/// does not invalidate internal references (std::map instead of DenseMap).
|
||||
///
|
||||
static const Optional<BitPart> &
|
||||
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
|
||||
std::map<Value *, Optional<BitPart>> &BPS) {
|
||||
auto I = BPS.find(V);
|
||||
if (I != BPS.end())
|
||||
return I->second;
|
||||
|
||||
auto &Result = BPS[V] = None;
|
||||
auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
|
||||
|
||||
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
// If this is an or instruction, it may be an inner node of the bswap.
|
||||
if (I->getOpcode() == Instruction::Or) {
|
||||
auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||
MatchBitReversals, BPS);
|
||||
auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
|
||||
MatchBitReversals, BPS);
|
||||
if (!A || !B)
|
||||
return Result;
|
||||
|
||||
// Try and merge the two together.
|
||||
if (!A->Provider || A->Provider != B->Provider)
|
||||
return Result;
|
||||
|
||||
Result = BitPart(A->Provider, BitWidth);
|
||||
for (unsigned i = 0; i < A->Provenance.size(); ++i) {
|
||||
if (A->Provenance[i] != BitPart::Unset &&
|
||||
B->Provenance[i] != BitPart::Unset &&
|
||||
A->Provenance[i] != B->Provenance[i])
|
||||
return Result = None;
|
||||
|
||||
if (A->Provenance[i] == BitPart::Unset)
|
||||
Result->Provenance[i] = B->Provenance[i];
|
||||
else
|
||||
Result->Provenance[i] = A->Provenance[i];
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
// If this is a logical shift by a constant, recurse then shift the result.
|
||||
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
|
||||
unsigned BitShift =
|
||||
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
|
||||
// Ensure the shift amount is defined.
|
||||
if (BitShift > BitWidth)
|
||||
return Result;
|
||||
|
||||
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||
MatchBitReversals, BPS);
|
||||
if (!Res)
|
||||
return Result;
|
||||
Result = Res;
|
||||
|
||||
// Perform the "shift" on BitProvenance.
|
||||
auto &P = Result->Provenance;
|
||||
if (I->getOpcode() == Instruction::Shl) {
|
||||
P.erase(std::prev(P.end(), BitShift), P.end());
|
||||
P.insert(P.begin(), BitShift, BitPart::Unset);
|
||||
} else {
|
||||
P.erase(P.begin(), std::next(P.begin(), BitShift));
|
||||
P.insert(P.end(), BitShift, BitPart::Unset);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
|
||||
// If this is a logical 'and' with a mask that clears bits, recurse then
|
||||
// unset the appropriate bits.
|
||||
if (I->getOpcode() == Instruction::And &&
|
||||
isa<ConstantInt>(I->getOperand(1))) {
|
||||
APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
|
||||
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
|
||||
|
||||
// Check that the mask allows a multiple of 8 bits for a bswap, for an
|
||||
// early exit.
|
||||
unsigned NumMaskedBits = AndMask.countPopulation();
|
||||
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
|
||||
return Result;
|
||||
|
||||
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||
MatchBitReversals, BPS);
|
||||
if (!Res)
|
||||
return Result;
|
||||
Result = Res;
|
||||
|
||||
for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
|
||||
// If the AndMask is zero for this bit, clear the bit.
|
||||
if ((AndMask & Bit) == 0)
|
||||
Result->Provenance[i] = BitPart::Unset;
|
||||
|
||||
return Result;
|
||||
}
|
||||
}
|
||||
|
||||
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
|
||||
// the input value to the bswap/bitreverse.
|
||||
Result = BitPart(V, BitWidth);
|
||||
for (unsigned i = 0; i < BitWidth; ++i)
|
||||
Result->Provenance[i] = i;
|
||||
return Result;
|
||||
}
|
||||
|
||||
static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
|
||||
unsigned BitWidth) {
|
||||
if (From % 8 != To % 8)
|
||||
return false;
|
||||
// Convert from bit indices to byte indices and check for a byte reversal.
|
||||
From >>= 3;
|
||||
To >>= 3;
|
||||
BitWidth >>= 3;
|
||||
return From == BitWidth - To - 1;
|
||||
}
|
||||
|
||||
static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
|
||||
unsigned BitWidth) {
|
||||
return From == BitWidth - To - 1;
|
||||
}
|
||||
|
||||
/// Given an OR instruction, check to see if this is a bitreverse
|
||||
/// idiom. If so, insert the new intrinsic and return true.
|
||||
bool llvm::recognizeBitReverseOrBSwapIdiom(
|
||||
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
|
||||
SmallVectorImpl<Instruction *> &InsertedInsts) {
|
||||
if (Operator::getOpcode(I) != Instruction::Or)
|
||||
return false;
|
||||
if (!MatchBSwaps && !MatchBitReversals)
|
||||
return false;
|
||||
IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
|
||||
if (!ITy || ITy->getBitWidth() > 128)
|
||||
return false; // Can't do vectors or integers > 128 bits.
|
||||
unsigned BW = ITy->getBitWidth();
|
||||
|
||||
// Try to find all the pieces corresponding to the bswap.
|
||||
std::map<Value *, Optional<BitPart>> BPS;
|
||||
auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
|
||||
if (!Res)
|
||||
return false;
|
||||
auto &BitProvenance = Res->Provenance;
|
||||
|
||||
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
|
||||
// only byteswap values with an even number of bytes.
|
||||
bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;
|
||||
for (unsigned i = 0; i < BW; ++i) {
|
||||
OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
|
||||
OKForBitReverse &=
|
||||
bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
|
||||
}
|
||||
|
||||
Intrinsic::ID Intrin;
|
||||
if (OKForBSwap && MatchBSwaps)
|
||||
Intrin = Intrinsic::bswap;
|
||||
else if (OKForBitReverse && MatchBitReversals)
|
||||
Intrin = Intrinsic::bitreverse;
|
||||
else
|
||||
return false;
|
||||
|
||||
Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
|
||||
InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
|
||||
return true;
|
||||
}
|
||||
|
@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
|
||||
/// Any floating-point library function that we're trying to simplify will have
|
||||
/// a signature of the form: fptype foo(fptype param1, fptype param2, ...).
|
||||
/// CheckDoubleTy indicates that 'fptype' must be 'double'.
|
||||
static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams,
|
||||
bool CheckDoubleTy) {
|
||||
FunctionType *FT = F->getFunctionType();
|
||||
if (FT->getNumParams() != NumParams)
|
||||
return false;
|
||||
|
||||
Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
||||
bool CheckRetType) {
|
||||
// The return type must match what we're looking for.
|
||||
Type *RetTy = FT->getReturnType();
|
||||
if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy())
|
||||
return false;
|
||||
|
||||
// Each parameter must match the return type, and therefore, match every other
|
||||
// parameter too.
|
||||
for (const Type *ParamTy : FT->params())
|
||||
if (ParamTy != RetTy)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Shrink double -> float for unary functions like 'floor'.
|
||||
static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
||||
bool CheckRetType) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
FunctionType *FT = Callee->getFunctionType();
|
||||
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
|
||||
!FT->getParamType(0)->isDoubleTy())
|
||||
if (!matchesFPLibFunctionSignature(Callee, 1, true))
|
||||
return nullptr;
|
||||
|
||||
if (CheckRetType) {
|
||||
@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
||||
return B.CreateFPExt(V, B.getDoubleTy());
|
||||
}
|
||||
|
||||
// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
|
||||
Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
|
||||
/// Shrink double -> float for binary functions like 'fmin/fmax'.
|
||||
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
FunctionType *FT = Callee->getFunctionType();
|
||||
// Just make sure this has 2 arguments of the same FP type, which match the
|
||||
// result type.
|
||||
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
|
||||
FT->getParamType(0) != FT->getParamType(1) ||
|
||||
!FT->getParamType(0)->isFloatingPointTy())
|
||||
if (!matchesFPLibFunctionSignature(Callee, 2, true))
|
||||
return nullptr;
|
||||
|
||||
// If this is something like 'fmin((double)floatval1, (double)floatval2)',
|
||||
@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
|
||||
|
||||
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
|
||||
Function *Callee = CI->getCalledFunction();
|
||||
|
||||
|
||||
Value *Ret = nullptr;
|
||||
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
|
||||
Callee->getIntrinsicID() == Intrinsic::sqrt))
|
||||
Ret = optimizeUnaryDoubleFP(CI, B, true);
|
||||
|
||||
// FIXME: Refactor - this check is repeated all over this file and even in the
|
||||
// preceding call to shrink double -> float.
|
||||
|
||||
// Make sure this has 1 argument of FP type, which matches the result type.
|
||||
FunctionType *FT = Callee->getFunctionType();
|
||||
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
|
||||
!FT->getParamType(0)->isFloatingPointTy())
|
||||
return Ret;
|
||||
|
||||
if (!CI->hasUnsafeAlgebra())
|
||||
return Ret;
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
@sg = internal thread_local global %struct.S zeroinitializer, align 1
|
||||
@__dso_handle = external global i8
|
||||
@__tls_guard = internal thread_local unnamed_addr global i1 false
|
||||
@sum1 = internal thread_local global i32 0, align 4
|
||||
|
||||
declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
|
||||
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
|
||||
@ -74,3 +75,29 @@ __tls_init.exit:
|
||||
; CHECK-NOT: ldp d27, d26
|
||||
; CHECK-NOT: ldp d29, d28
|
||||
; CHECK-NOT: ldp d31, d30
|
||||
|
||||
; CHECK-LABEL: _ZTW4sum1
|
||||
; CHECK-NOT: stp d31, d30
|
||||
; CHECK-NOT: stp d29, d28
|
||||
; CHECK-NOT: stp d27, d26
|
||||
; CHECK-NOT: stp d25, d24
|
||||
; CHECK-NOT: stp d23, d22
|
||||
; CHECK-NOT: stp d21, d20
|
||||
; CHECK-NOT: stp d19, d18
|
||||
; CHECK-NOT: stp d17, d16
|
||||
; CHECK-NOT: stp d7, d6
|
||||
; CHECK-NOT: stp d5, d4
|
||||
; CHECK-NOT: stp d3, d2
|
||||
; CHECK-NOT: stp d1, d0
|
||||
; CHECK-NOT: stp x20, x19
|
||||
; CHECK-NOT: stp x14, x13
|
||||
; CHECK-NOT: stp x12, x11
|
||||
; CHECK-NOT: stp x10, x9
|
||||
; CHECK-NOT: stp x8, x7
|
||||
; CHECK-NOT: stp x6, x5
|
||||
; CHECK-NOT: stp x4, x3
|
||||
; CHECK-NOT: stp x2, x1
|
||||
; CHECK: blr
|
||||
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
|
||||
ret i32* @sum1
|
||||
}
|
||||
|
43
test/CodeGen/ARM/cse-flags.ll
Normal file
43
test/CodeGen/ARM/cse-flags.ll
Normal file
@ -0,0 +1,43 @@
|
||||
; RUN: llc -asm-verbose=false < %s | FileCheck %s
|
||||
; PR26063
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
; CHECK: .LBB0_1:
|
||||
; CHECK-NEXT: bl f{{$}}
|
||||
; CHECK-NEXT: ldrb r[[T0:[0-9]+]], [r{{[0-9]+}}, #1]!{{$}}
|
||||
; CHECK-NEXT: cmp r{{[0-9]+}}, #1{{$}}
|
||||
; CHECK-NEXT: cmpne r[[T0]], #0{{$}}
|
||||
; CHECK-NEXT: bne .LBB0_1{{$}}
|
||||
define i8* @h(i8* readonly %a, i32 %b, i32 %c) {
|
||||
entry:
|
||||
%0 = load i8, i8* %a, align 1
|
||||
%tobool4 = icmp ne i8 %0, 0
|
||||
%cmp5 = icmp ne i32 %b, 1
|
||||
%1 = and i1 %cmp5, %tobool4
|
||||
br i1 %1, label %while.body.preheader, label %while.end
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
%a.addr.06 = phi i8* [ %incdec.ptr, %while.body ], [ %a, %while.body.preheader ]
|
||||
%call = tail call i32 bitcast (i32 (...)* @f to i32 ()*)()
|
||||
%incdec.ptr = getelementptr inbounds i8, i8* %a.addr.06, i32 1
|
||||
%2 = load i8, i8* %incdec.ptr, align 1
|
||||
%tobool = icmp ne i8 %2, 0
|
||||
%cmp = icmp ne i32 %call, 1
|
||||
%3 = and i1 %cmp, %tobool
|
||||
br i1 %3, label %while.body, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
%incdec.ptr.lcssa = phi i8* [ %incdec.ptr, %while.body ]
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%a.addr.0.lcssa = phi i8* [ %a, %entry ], [ %incdec.ptr.lcssa, %while.end.loopexit ]
|
||||
ret i8* %a.addr.0.lcssa
|
||||
}
|
||||
|
||||
declare i32 @f(...)
|
@ -8,6 +8,7 @@
|
||||
@sg = internal thread_local global %struct.S zeroinitializer, align 1
|
||||
@__dso_handle = external global i8
|
||||
@__tls_guard = internal thread_local unnamed_addr global i1 false
|
||||
@sum1 = internal thread_local global i32 0, align 4
|
||||
|
||||
declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
|
||||
declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
|
||||
@ -44,3 +45,13 @@ __tls_init.exit:
|
||||
; CHECK-NOT: pop {r9, r12}
|
||||
; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
|
||||
; CHECK: pop {lr}
|
||||
|
||||
; CHECK-LABEL: _ZTW4sum1
|
||||
; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
|
||||
; CHECK-NOT: push {r9, r12}
|
||||
; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
|
||||
; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
|
||||
; CHECK: blx
|
||||
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
|
||||
ret i32* @sum1
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS
|
||||
; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
|
||||
; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
|
||||
; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
|
||||
; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
|
||||
; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
|
||||
; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK
|
||||
; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI --check-prefix=CHECK
|
||||
|
||||
define void @f1(i8* %dest, i8* %src) {
|
||||
entry:
|
||||
@ -402,8 +402,8 @@ entry:
|
||||
; CHECK: arr1:
|
||||
; CHECK-IOS: .align 3
|
||||
; CHECK-DARWIN: .align 2
|
||||
; CHECK-EABI: .align 2
|
||||
; CHECK-GNUEABI: .align 2
|
||||
; CHECK-EABI-NOT: .align
|
||||
; CHECK-GNUEABI-NOT: .align
|
||||
; CHECK: arr2:
|
||||
; CHECK: {{\.section.+foo,bar}}
|
||||
; CHECK-NOT: .align
|
||||
|
@ -1,20 +0,0 @@
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
; CHECK: addl
|
||||
|
||||
; The two additions are the same , but have different flags.
|
||||
; In theory this code should never be generated by the frontend, but this
|
||||
; tries to test that two identical instructions with two different flags
|
||||
; actually generate two different nodes.
|
||||
;
|
||||
; Normally the combiner would see this condition without the flags
|
||||
; and optimize the result of the sub into a register clear
|
||||
; (the final result would be 0). With the different flags though the combiner
|
||||
; needs to keep the add + sub nodes, because the two nodes result as different
|
||||
; nodes and so cannot assume that the subtraction of the two nodes
|
||||
; generates 0 as result
|
||||
define i32 @foo(i32 %a, i32 %b) {
|
||||
%1 = add i32 %a, %b
|
||||
%2 = add nsw i32 %a, %b
|
||||
%3 = sub i32 %1, %2
|
||||
ret i32 %3
|
||||
}
|
@ -4,11 +4,13 @@
|
||||
; tricks similar to AArch64 fast TLS calling convention (r255821).
|
||||
; Applying tricks on x86-64 similar to r255821.
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -O0 | FileCheck %s --check-prefix=CHECK-O0
|
||||
%struct.S = type { i8 }
|
||||
|
||||
@sg = internal thread_local global %struct.S zeroinitializer, align 1
|
||||
@__dso_handle = external global i8
|
||||
@__tls_guard = internal thread_local unnamed_addr global i1 false
|
||||
@sum1 = internal thread_local global i32 0, align 4
|
||||
|
||||
declare void @_ZN1SC1Ev(%struct.S*)
|
||||
declare void @_ZN1SD1Ev(%struct.S*)
|
||||
@ -50,3 +52,28 @@ init.i:
|
||||
__tls_init.exit:
|
||||
ret %struct.S* @sg
|
||||
}
|
||||
|
||||
; CHECK-LABEL: _ZTW4sum1
|
||||
; CHECK-NOT: pushq %r11
|
||||
; CHECK-NOT: pushq %r10
|
||||
; CHECK-NOT: pushq %r9
|
||||
; CHECK-NOT: pushq %r8
|
||||
; CHECK-NOT: pushq %rsi
|
||||
; CHECK-NOT: pushq %rdx
|
||||
; CHECK-NOT: pushq %rcx
|
||||
; CHECK-NOT: pushq %rbx
|
||||
; CHECK: callq
|
||||
define cxx_fast_tlscc nonnull i32* @_ZTW4sum1() nounwind {
|
||||
ret i32* @sum1
|
||||
}
|
||||
|
||||
; Make sure at O0 we don't overwrite RBP.
|
||||
; CHECK-O0-LABEL: _ZTW4sum2
|
||||
; CHECK-O0: pushq %rbp
|
||||
; CHECK-O0: movq %rsp, %rbp
|
||||
; CHECK-O0-NOT: movq %r{{.*}}, (%rbp)
|
||||
define cxx_fast_tlscc i32* @_ZTW4sum2() #0 {
|
||||
ret i32* @sum1
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "no-frame-pointer-elim"="true" }
|
||||
|
@ -1,11 +1,5 @@
|
||||
; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
|
||||
;
|
||||
; This test checks that we do not use shrink-wrapping when
|
||||
; the function does not have any frame pointer and may unwind.
|
||||
; This is a workaround for a limitation in the emission of
|
||||
; the CFI directives, that are not correct in such case.
|
||||
; PR25614
|
||||
;
|
||||
; Note: This test cannot be merged with the shrink-wrapping tests
|
||||
; because the booleans set on the command line take precedence on
|
||||
; the target logic that disable shrink-wrapping.
|
||||
@ -13,6 +7,12 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "x86_64-apple-macosx"
|
||||
|
||||
|
||||
; This test checks that we do not use shrink-wrapping when
|
||||
; the function does not have any frame pointer and may unwind.
|
||||
; This is a workaround for a limitation in the emission of
|
||||
; the CFI directives, that are not correct in such case.
|
||||
; PR25614
|
||||
;
|
||||
; No shrink-wrapping should occur here, until the CFI information are fixed.
|
||||
; CHECK-LABEL: framelessUnwind:
|
||||
;
|
||||
@ -151,3 +151,74 @@ false:
|
||||
}
|
||||
|
||||
attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
|
||||
|
||||
|
||||
; Check that we generate correct code for segmented stack.
|
||||
; We used to emit the code at the entry point of the function
|
||||
; instead of just before the prologue.
|
||||
; For now, shrink-wrapping is disabled on segmented stack functions: PR26107.
|
||||
;
|
||||
; CHECK-LABEL: segmentedStack:
|
||||
; CHECK: cmpq
|
||||
; CHECK-NEXT: ja [[ENTRY_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; CHECK: callq ___morestack
|
||||
; CHECK-NEXT: retq
|
||||
;
|
||||
; CHECK: [[ENTRY_LABEL]]:
|
||||
; Prologue
|
||||
; CHECK: push
|
||||
;
|
||||
; In PR26107, we use to drop these two basic blocks, because
|
||||
; the segmentedStack entry block was jumping directly to
|
||||
; the place where the prologue is actually needed, which is
|
||||
; the call to memcmp.
|
||||
; Then, those two basic blocks did not have any predecessors
|
||||
; anymore and were removed.
|
||||
;
|
||||
; Check if vk1 is null
|
||||
; CHECK: testq %rdi, %rdi
|
||||
; CHECK-NEXT: je [[STRINGS_EQUAL:LBB[0-9_]+]]
|
||||
;
|
||||
; Check if vk2 is null
|
||||
; CHECK: testq %rsi, %rsi
|
||||
; CHECK-NEXT: je [[STRINGS_EQUAL]]
|
||||
;
|
||||
; CHECK: [[STRINGS_EQUAL]]
|
||||
; CHECK-NEXT: popq
|
||||
define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 {
|
||||
entry:
|
||||
%cmp.i = icmp eq i8* %vk1, null
|
||||
%cmp1.i = icmp eq i8* %vk2, null
|
||||
%brmerge.i = or i1 %cmp.i, %cmp1.i
|
||||
%cmp1.mux.i = and i1 %cmp.i, %cmp1.i
|
||||
br i1 %brmerge.i, label %__go_ptr_strings_equal.exit, label %if.end4.i
|
||||
|
||||
if.end4.i: ; preds = %entry
|
||||
%tmp = getelementptr inbounds i8, i8* %vk1, i64 8
|
||||
%tmp1 = bitcast i8* %tmp to i64*
|
||||
%tmp2 = load i64, i64* %tmp1, align 8
|
||||
%tmp3 = getelementptr inbounds i8, i8* %vk2, i64 8
|
||||
%tmp4 = bitcast i8* %tmp3 to i64*
|
||||
%tmp5 = load i64, i64* %tmp4, align 8
|
||||
%cmp.i.i = icmp eq i64 %tmp2, %tmp5
|
||||
br i1 %cmp.i.i, label %land.rhs.i.i, label %__go_ptr_strings_equal.exit
|
||||
|
||||
land.rhs.i.i: ; preds = %if.end4.i
|
||||
%tmp6 = bitcast i8* %vk2 to i8**
|
||||
%tmp7 = load i8*, i8** %tmp6, align 8
|
||||
%tmp8 = bitcast i8* %vk1 to i8**
|
||||
%tmp9 = load i8*, i8** %tmp8, align 8
|
||||
%call.i.i = tail call i32 @memcmp(i8* %tmp9, i8* %tmp7, i64 %tmp2) #5
|
||||
%cmp4.i.i = icmp eq i32 %call.i.i, 0
|
||||
br label %__go_ptr_strings_equal.exit
|
||||
|
||||
__go_ptr_strings_equal.exit: ; preds = %land.rhs.i.i, %if.end4.i, %entry
|
||||
%retval.0.i = phi i1 [ %cmp1.mux.i, %entry ], [ false, %if.end4.i ], [ %cmp4.i.i, %land.rhs.i.i ]
|
||||
ret i1 %retval.0.i
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) #5
|
||||
|
||||
attributes #5 = { nounwind readonly ssp uwtable "split-stack" }
|
||||
|
107
test/DebugInfo/ARM/PR26163.ll
Normal file
107
test/DebugInfo/ARM/PR26163.ll
Normal file
@ -0,0 +1,107 @@
|
||||
; RUN: llc -filetype=obj -o - < %s | llvm-dwarfdump - | FileCheck %s
|
||||
;
|
||||
; Checks that we're creating two ranges, one that terminates immediately
|
||||
; and one that spans the rest of the function. This isn't necessarily the
|
||||
; best thing to do here (and also not necessarily correct, since the first
|
||||
; one has a bit_piece), but it is what is currently being emitted, any
|
||||
; change here needs to be intentional, so the test is very specific.
|
||||
;
|
||||
; CHECK: .debug_loc contents:
|
||||
; CHECK: 0x00000000: Beginning address offset: 0x0000000000000004
|
||||
; CHECK: Ending address offset: 0x0000000000000004
|
||||
; CHECK: Location description: 10 00 9f
|
||||
; CHECK: Beginning address offset: 0x0000000000000004
|
||||
; CHECK: Ending address offset: 0x0000000000000014
|
||||
; CHECK: Location description: 10 00 9f
|
||||
|
||||
; Created form the following test case (PR26163) with
|
||||
; clang -cc1 -triple armv4t--freebsd11.0-gnueabi -emit-obj -debug-info-kind=standalone -O2 -x c test.c
|
||||
;
|
||||
; typedef unsigned int size_t;
|
||||
; struct timeval {
|
||||
; long long tv_sec;
|
||||
; int tv_usec;
|
||||
; };
|
||||
;
|
||||
; void *memset(void *, int, size_t);
|
||||
; void foo(void);
|
||||
;
|
||||
; static void
|
||||
; bar(int value)
|
||||
; {
|
||||
; struct timeval lifetime;
|
||||
;
|
||||
; memset(&lifetime, 0, sizeof(struct timeval));
|
||||
; lifetime.tv_sec = value;
|
||||
;
|
||||
; foo();
|
||||
; }
|
||||
;
|
||||
; int
|
||||
; parse_config_file(void)
|
||||
; {
|
||||
; int value;
|
||||
;
|
||||
; bar(value);
|
||||
; return (0);
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv4t--freebsd11.0-gnueabi"
|
||||
|
||||
%struct.timeval = type { i64, i32 }
|
||||
|
||||
declare void @llvm.dbg.declare(metadata, metadata, metadata)
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
|
||||
|
||||
declare void @foo()
|
||||
|
||||
define i32 @parse_config_file() !dbg !4 {
|
||||
entry:
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !26), !dbg !27
|
||||
tail call void @llvm.dbg.declare(metadata %struct.timeval* undef, metadata !16, metadata !26), !dbg !29
|
||||
tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !16, metadata !30), !dbg !29
|
||||
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !31), !dbg !29
|
||||
tail call void @foo() #3, !dbg !32
|
||||
ret i32 0, !dbg !33
|
||||
}
|
||||
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!22, !23, !24}
|
||||
!llvm.ident = !{!25}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
|
||||
!1 = !DIFile(filename: "<stdin>", directory: "/home/ubuntu/bugs")
|
||||
!2 = !{}
|
||||
!3 = !{!4, !11}
|
||||
!4 = distinct !DISubprogram(name: "parse_config_file", scope: !5, file: !5, line: 22, type: !6, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, variables: !9)
|
||||
!5 = !DIFile(filename: "test.c", directory: "/home/ubuntu/bugs")
|
||||
!6 = !DISubroutineType(types: !7)
|
||||
!7 = !{!8}
|
||||
!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
|
||||
!9 = !{!10}
|
||||
!10 = !DILocalVariable(name: "value", scope: !4, file: !5, line: 24, type: !8)
|
||||
!11 = distinct !DISubprogram(name: "bar", scope: !5, file: !5, line: 11, type: !12, isLocal: true, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !14)
|
||||
!12 = !DISubroutineType(types: !13)
|
||||
!13 = !{null, !8}
|
||||
!14 = !{!15, !16}
|
||||
!15 = !DILocalVariable(name: "value", arg: 1, scope: !11, file: !5, line: 11, type: !8)
|
||||
!16 = !DILocalVariable(name: "lifetime", scope: !11, file: !5, line: 13, type: !17)
|
||||
!17 = !DICompositeType(tag: DW_TAG_structure_type, name: "timeval", file: !5, line: 2, size: 128, align: 64, elements: !18)
|
||||
!18 = !{!19, !21}
|
||||
!19 = !DIDerivedType(tag: DW_TAG_member, name: "tv_sec", scope: !17, file: !5, line: 3, baseType: !20, size: 64, align: 64)
|
||||
!20 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
|
||||
!21 = !DIDerivedType(tag: DW_TAG_member, name: "tv_usec", scope: !17, file: !5, line: 4, baseType: !8, size: 32, align: 32, offset: 64)
|
||||
!22 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!23 = !{i32 1, !"wchar_size", i32 4}
|
||||
!24 = !{i32 1, !"min_enum_size", i32 4}
|
||||
!25 = !{!"clang version 3.9.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257891)"}
|
||||
!26 = !DIExpression()
|
||||
!27 = !DILocation(line: 11, scope: !11, inlinedAt: !28)
|
||||
!28 = distinct !DILocation(line: 26, scope: !4)
|
||||
!29 = !DILocation(line: 13, scope: !11, inlinedAt: !28)
|
||||
!30 = !DIExpression(DW_OP_bit_piece, 0, 64)
|
||||
!31 = !DIExpression(DW_OP_bit_piece, 0, 32)
|
||||
!32 = !DILocation(line: 18, scope: !11, inlinedAt: !28)
|
||||
!33 = !DILocation(line: 27, scope: !4)
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
declare i32 @FB()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
declare i32 @FB()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
define i32 @bar() nounwind {
|
||||
ret i32 0
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
; This test should fail until remote symbol resolution is supported.
|
||||
|
||||
define i32 @main() nounwind {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
; The intention of this test is to verify that symbols mapped to COMMON in ELF
|
||||
; work as expected.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
; Check that a variable is always aligned as specified.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
define double @test(double* %DP, double %Arg) nounwind {
|
||||
%D = load double, double* %DP ; <double> [#uses=1]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
@count = global i32 1, align 4
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
|
||||
; RUN: -relocation-model=pic -code-model=small %s > /dev/null
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32
|
||||
|
||||
@count = global i32 1, align 4
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
|
||||
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
|
||||
; RUN: -O0 -relocation-model=pic -code-model=small %s
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32
|
||||
|
||||
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
|
||||
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
declare i32 @FB()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
declare i32 @FB()
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
define i32 @bar() nounwind {
|
||||
ret i32 0
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
; This test should fail until remote symbol resolution is supported.
|
||||
|
||||
define i32 @main() nounwind {
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
; The intention of this test is to verify that symbols mapped to COMMON in ELF
|
||||
; work as expected.
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
; Check that a variable is always aligned as specified.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
define double @test(double* %DP, double %Arg) nounwind {
|
||||
%D = load double, double* %DP ; <double> [#uses=1]
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
@count = global i32 1, align 4
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
|
||||
; RUN: -relocation-model=pic -code-model=small %s > /dev/null
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32
|
||||
|
||||
@count = global i32 1, align 4
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
|
||||
; XFAIL: win32
|
||||
; XFAIL: mingw32,win32
|
||||
|
||||
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
|
||||
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
|
||||
|
@ -1,6 +1,6 @@
|
||||
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
|
||||
; RUN: -O0 -relocation-model=pic -code-model=small %s
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
|
||||
; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, mingw32, win32
|
||||
|
||||
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
|
||||
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
|
||||
|
@ -1,7 +1,14 @@
|
||||
// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=asm -o - \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ASM
|
||||
// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o - \
|
||||
// RUN: | llvm-readobj -s -sd | FileCheck %s --check-prefix=CHECK-OBJ
|
||||
// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o %t
|
||||
// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ
|
||||
// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS
|
||||
|
||||
// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=asm -o - \
|
||||
// RUN: | FileCheck %s --check-prefix=CHECK-ASM
|
||||
// RUN: llvm-mc %s -triple=aarch64_be-none-linux-gnu -filetype=obj -o %t
|
||||
// RUN: llvm-readobj -s -sd %t | FileCheck %s --check-prefix=CHECK-OBJ
|
||||
// RUN: llvm-objdump -t %t | FileCheck %s --check-prefix=CHECK-SYMS
|
||||
|
||||
.section .inst.aarch64_inst
|
||||
|
||||
@ -22,3 +29,7 @@ aarch64_inst:
|
||||
// CHECK-OBJ: SectionData (
|
||||
// CHECK-OBJ-NEXT: 0000: 2040105E
|
||||
// CHECK-OBJ-NEXT: )
|
||||
|
||||
// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d
|
||||
// CHECK-SYMS: 0000000000000000 .inst.aarch64_inst 00000000 $x
|
||||
// CHECK-SYMS-NOT: 0000000000000000 .inst.aarch64_inst 00000000 $d
|
||||
|
37
test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
Normal file
37
test/Transforms/CodeGenPrepare/ARM/bitreverse-recognize.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: opt -S -loop-unroll -codegenprepare < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
|
||||
target triple = "armv7--linux-gnueabihf"
|
||||
|
||||
; CHECK-LABEL: @f
|
||||
define i32 @f(i32 %a) #0 {
|
||||
; CHECK: call i32 @llvm.bitreverse.i32
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body
|
||||
ret i32 %or
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
|
||||
%shr = lshr i32 %a, %i.08
|
||||
%and = and i32 %shr, 1
|
||||
%sub = sub nuw nsw i32 31, %i.08
|
||||
%shl = shl i32 %and, %sub
|
||||
%or = or i32 %shl, %b.07
|
||||
%inc = add nuw nsw i32 %i.08, 1
|
||||
%exitcond = icmp eq i32 %inc, 32
|
||||
br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0, !1}
|
||||
!llvm.ident = !{!2}
|
||||
|
||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||
!1 = !{i32 1, !"min_enum_size", i32 4}
|
||||
!2 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b7441a0f42c43a8eea9e3e706be187252db747fa)"}
|
||||
!3 = distinct !{!3, !4}
|
||||
!4 = !{!"llvm.loop.unroll.full"}
|
3
test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
Normal file
3
test/Transforms/CodeGenPrepare/ARM/lit.local.cfg
Normal file
@ -0,0 +1,3 @@
|
||||
if not 'ARM' in config.root.targets:
|
||||
config.unsupported = True
|
||||
|
53
test/Transforms/CodeGenPrepare/bitreverse-hang.ll
Normal file
53
test/Transforms/CodeGenPrepare/bitreverse-hang.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -loop-unroll -codegenprepare -S | FileCheck %s
|
||||
|
||||
; This test is a worst-case scenario for bitreversal/byteswap detection.
|
||||
; After loop unrolling (the unrolled loop is unreadably large so it has been kept
|
||||
; rolled here), we have a binary tree of OR operands (as bitreversal detection
|
||||
; looks straight through shifts):
|
||||
;
|
||||
; OR
|
||||
; | \
|
||||
; | LSHR
|
||||
; | /
|
||||
; OR
|
||||
; | \
|
||||
; | LSHR
|
||||
; | /
|
||||
; OR
|
||||
;
|
||||
; This results in exponential runtime. The loop here is 32 iterations which will
|
||||
; totally hang if we don't deal with this case cleverly.
|
||||
|
||||
@b = common global i32 0, align 4
|
||||
|
||||
; CHECK: define i32 @fn1
|
||||
define i32 @fn1() #0 {
|
||||
entry:
|
||||
%b.promoted = load i32, i32* @b, align 4, !tbaa !2
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
|
||||
%i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%shr = lshr i32 %or4, 1
|
||||
%or = or i32 %shr, %or4
|
||||
%inc = add nuw nsw i32 %i.03, 1
|
||||
%exitcond = icmp eq i32 %inc, 32
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
store i32 %or, i32* @b, align 4, !tbaa !2
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"PIC Level", i32 2}
|
||||
!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
|
||||
!2 = !{!3, !3, i64 0}
|
||||
!3 = !{!"int", !4, i64 0}
|
||||
!4 = !{!"omnipotent char", !5, i64 0}
|
||||
!5 = !{!"Simple C/C++ TBAA"}
|
455
test/Transforms/Inline/inline-funclets.ll
Normal file
455
test/Transforms/Inline/inline-funclets.ll
Normal file
@ -0,0 +1,455 @@
|
||||
; RUN: opt -inline -S %s | FileCheck %s
|
||||
|
||||
declare void @g()
|
||||
|
||||
|
||||
;;; Test with a call in a funclet that needs to remain a call
|
||||
;;; when inlined because the funclet doesn't unwind to caller.
|
||||
;;; CHECK-LABEL: define void @test1(
|
||||
define void @test1() personality void ()* @g {
|
||||
entry:
|
||||
; CHECK-NEXT: entry:
|
||||
invoke void @test1_inlinee()
|
||||
to label %exit unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test1_inlinee() alwaysinline personality void ()* @g {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %cleanup.inner
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: unwind label %[[cleanup_inner:.+]]
|
||||
|
||||
cleanup.inner:
|
||||
%pad.inner = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad.inner) ]
|
||||
cleanupret from %pad.inner unwind label %cleanup.outer
|
||||
; CHECK: [[cleanup_inner]]:
|
||||
; The call here needs to remain a call becuase pad.inner has a cleanupret
|
||||
; that stays within the inlinee.
|
||||
; CHECK-NEXT: %[[pad_inner:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: call void @g() [ "funclet"(token %[[pad_inner]]) ]
|
||||
; CHECK-NEXT: cleanupret from %[[pad_inner]] unwind label %[[cleanup_outer:.+]]
|
||||
|
||||
cleanup.outer:
|
||||
%pad.outer = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad.outer) ]
|
||||
cleanupret from %pad.outer unwind to caller
|
||||
; CHECK: [[cleanup_outer]]:
|
||||
; The call and cleanupret here need to be redirected to caller cleanup
|
||||
; CHECK-NEXT: %[[pad_outer:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad_outer]]) ]
|
||||
; CHECK-NEXT: unwind label %cleanup
|
||||
; CHECK: cleanupret from %[[pad_outer]] unwind label %cleanup{{$}}
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
;;; Test with an "unwind to caller" catchswitch in a parent funclet
|
||||
;;; that needs to remain "unwind to caller" because the parent
|
||||
;;; doesn't unwind to caller.
|
||||
;;; CHECK-LABEL: define void @test2(
|
||||
define void @test2() personality void ()* @g {
|
||||
entry:
|
||||
; CHECK-NEXT: entry:
|
||||
invoke void @test2_inlinee()
|
||||
to label %exit unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test2_inlinee() alwaysinline personality void ()* @g {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %cleanup1
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: unwind label %[[cleanup1:.+]]
|
||||
|
||||
cleanup1:
|
||||
%outer = cleanuppad within none []
|
||||
invoke void @g() [ "funclet"(token %outer) ]
|
||||
to label %ret1 unwind label %catchswitch
|
||||
; CHECK: [[cleanup1]]:
|
||||
; CHECK-NEXT: %[[outer:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[outer]]) ]
|
||||
; CHECK-NEXT: unwind label %[[catchswitch:.+]]
|
||||
|
||||
catchswitch:
|
||||
%cs = catchswitch within %outer [label %catch] unwind to caller
|
||||
; CHECK: [[catchswitch]]:
|
||||
; The catchswitch here needs to remain "unwind to caller" since %outer
|
||||
; has a cleanupret that remains within the inlinee.
|
||||
; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[outer]] [label %[[catch:.+]]] unwind to caller
|
||||
|
||||
catch:
|
||||
%inner = catchpad within %cs []
|
||||
call void @g() [ "funclet"(token %inner) ]
|
||||
catchret from %inner to label %ret1
|
||||
; CHECK: [[catch]]:
|
||||
; The call here needs to remain a call since it too is within %outer
|
||||
; CHECK: %[[inner:[^ ]+]] = catchpad within %[[cs]]
|
||||
; CHECK-NEXT: call void @g() [ "funclet"(token %[[inner]]) ]
|
||||
|
||||
ret1:
|
||||
cleanupret from %outer unwind label %cleanup2
|
||||
; CHECK: cleanupret from %[[outer]] unwind label %[[cleanup2:.+]]
|
||||
|
||||
cleanup2:
|
||||
%later = cleanuppad within none []
|
||||
cleanupret from %later unwind to caller
|
||||
; CHECK: [[cleanup2]]:
|
||||
; The cleanupret here needs to get redirected to the caller cleanup
|
||||
; CHECK-NEXT: %[[later:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: cleanupret from %[[later]] unwind label %cleanup{{$}}
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;; Test with a call in a cleanup that has no definitive unwind
|
||||
;;; destination, that must be rewritten to an invoke.
|
||||
;;; CHECK-LABEL: define void @test3(
|
||||
define void @test3() personality void ()* @g {
|
||||
entry:
|
||||
; CHECK-NEXT: entry:
|
||||
invoke void @test3_inlinee()
|
||||
to label %exit unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test3_inlinee() alwaysinline personality void ()* @g {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %cleanup
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: unwind label %[[cleanup:.+]]
|
||||
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
unreachable
|
||||
; CHECK: [[cleanup]]:
|
||||
; The call must be rewritten to an invoke targeting the caller cleanup
|
||||
; because it may well unwind to there.
|
||||
; CHECK-NEXT: %[[pad:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad]]) ]
|
||||
; CHECK-NEXT: unwind label %cleanup{{$}}
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;; Test with a catchswitch in a cleanup that has no definitive
|
||||
;;; unwind destination, that must be rewritten to unwind to the
|
||||
;;; inlined invoke's unwind dest
|
||||
;;; CHECK-LABEL: define void @test4(
|
||||
define void @test4() personality void ()* @g {
|
||||
entry:
|
||||
; CHECK-NEXT: entry:
|
||||
invoke void @test4_inlinee()
|
||||
to label %exit unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test4_inlinee() alwaysinline personality void ()* @g {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %cleanup
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: unwind label %[[cleanup:.+]]
|
||||
|
||||
cleanup:
|
||||
%clean = cleanuppad within none []
|
||||
invoke void @g() [ "funclet"(token %clean) ]
|
||||
to label %unreachable unwind label %dispatch
|
||||
; CHECK: [[cleanup]]:
|
||||
; CHECK-NEXT: %[[clean:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[clean]]) ]
|
||||
; CHECK-NEXT: unwind label %[[dispatch:.+]]
|
||||
|
||||
dispatch:
|
||||
%cs = catchswitch within %clean [label %catch] unwind to caller
|
||||
; CHECK: [[dispatch]]:
|
||||
; The catchswitch must be rewritten to unwind to %cleanup in the caller
|
||||
; because it may well unwind to there.
|
||||
; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[clean]] [label %[[catch:.+]]] unwind label %cleanup{{$}}
|
||||
|
||||
catch:
|
||||
catchpad within %cs []
|
||||
br label %unreachable
|
||||
unreachable:
|
||||
unreachable
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;; Test with multiple levels of nesting, and unwind dests
|
||||
;;; that need to be inferred from ancestors, descendants,
|
||||
;;; and cousins.
|
||||
;;; CHECK-LABEL: define void @test5(
|
||||
define void @test5() personality void ()* @g {
|
||||
entry:
|
||||
; CHECK-NEXT: entry:
|
||||
invoke void @test5_inlinee()
|
||||
to label %exit unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test5_inlinee() alwaysinline personality void ()* @g {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %cont unwind label %noinfo.root
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: to label %[[cont:[^ ]+]] unwind label %[[noinfo_root:.+]]
|
||||
|
||||
noinfo.root:
|
||||
%noinfo.root.pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %noinfo.root.pad) ]
|
||||
invoke void @g() [ "funclet"(token %noinfo.root.pad) ]
|
||||
to label %noinfo.root.cont unwind label %noinfo.left
|
||||
; CHECK: [[noinfo_root]]:
|
||||
; Nothing under "noinfo.root" has a definitive unwind destination, so
|
||||
; we must assume all of it may actually unwind, and redirect unwinds
|
||||
; to the cleanup in the caller.
|
||||
; CHECK-NEXT: %[[noinfo_root_pad:[^ ]+]] = cleanuppad within none []
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ]
|
||||
; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}}
|
||||
; CHECK: [[next]]:
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ]
|
||||
; CHECK-NEXT: to label %[[noinfo_root_cont:[^ ]+]] unwind label %[[noinfo_left:.+]]
|
||||
|
||||
noinfo.left:
|
||||
%noinfo.left.pad = cleanuppad within %noinfo.root.pad []
|
||||
invoke void @g() [ "funclet"(token %noinfo.left.pad) ]
|
||||
to label %unreachable unwind label %noinfo.left.child
|
||||
; CHECK: [[noinfo_left]]:
|
||||
; CHECK-NEXT: %[[noinfo_left_pad:[^ ]+]] = cleanuppad within %[[noinfo_root_pad]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[noinfo_left_child:.+]]
|
||||
|
||||
noinfo.left.child:
|
||||
%noinfo.left.child.cs = catchswitch within %noinfo.left.pad [label %noinfo.left.child.catch] unwind to caller
|
||||
; CHECK: [[noinfo_left_child]]:
|
||||
; CHECK-NEXT: %[[noinfo_left_child_cs:[^ ]+]] = catchswitch within %[[noinfo_left_pad]] [label %[[noinfo_left_child_catch:[^ ]+]]] unwind label %cleanup{{$}}
|
||||
|
||||
noinfo.left.child.catch:
|
||||
%noinfo.left.child.pad = catchpad within %noinfo.left.child.cs []
|
||||
call void @g() [ "funclet"(token %noinfo.left.child.pad) ]
|
||||
br label %unreachable
|
||||
; CHECK: [[noinfo_left_child_catch]]:
|
||||
; CHECK-NEXT: %[[noinfo_left_child_pad:[^ ]+]] = catchpad within %[[noinfo_left_child_cs]] []
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_child_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %cleanup{{$}}
|
||||
|
||||
noinfo.root.cont:
|
||||
invoke void @g() [ "funclet"(token %noinfo.root.pad) ]
|
||||
to label %unreachable unwind label %noinfo.right
|
||||
; CHECK: [[noinfo_root_cont]]:
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[noinfo_right:.+]]
|
||||
|
||||
noinfo.right:
|
||||
%noinfo.right.cs = catchswitch within %noinfo.root.pad [label %noinfo.right.catch] unwind to caller
|
||||
; CHECK: [[noinfo_right]]:
|
||||
; CHECK-NEXT: %[[noinfo_right_cs:[^ ]+]] = catchswitch within %[[noinfo_root_pad]] [label %[[noinfo_right_catch:[^ ]+]]] unwind label %cleanup{{$}}
|
||||
|
||||
noinfo.right.catch:
|
||||
%noinfo.right.pad = catchpad within %noinfo.right.cs []
|
||||
invoke void @g() [ "funclet"(token %noinfo.right.pad) ]
|
||||
to label %unreachable unwind label %noinfo.right.child
|
||||
; CHECK: [[noinfo_right_catch]]:
|
||||
; CHECK-NEXT: %[[noinfo_right_pad:[^ ]+]] = catchpad within %[[noinfo_right_cs]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[noinfo_right_child:.+]]
|
||||
|
||||
noinfo.right.child:
|
||||
%noinfo.right.child.pad = cleanuppad within %noinfo.right.pad []
|
||||
call void @g() [ "funclet"(token %noinfo.right.child.pad) ]
|
||||
br label %unreachable
|
||||
; CHECK: [[noinfo_right_child]]:
|
||||
; CHECK-NEXT: %[[noinfo_right_child_pad:[^ ]+]] = cleanuppad within %[[noinfo_right_pad]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_child_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %cleanup{{$}}
|
||||
|
||||
cont:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %implicit.root
|
||||
; CHECK: [[cont]]:
|
||||
; CHECK-NEXT: invoke void @g()
|
||||
; CHECK-NEXT: unwind label %[[implicit_root:.+]]
|
||||
|
||||
implicit.root:
|
||||
%implicit.root.pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %implicit.root.pad) ]
|
||||
invoke void @g() [ "funclet"(token %implicit.root.pad) ]
|
||||
to label %implicit.root.cont unwind label %implicit.left
|
||||
; CHECK: [[implicit_root]]:
|
||||
; There's an unwind edge to %internal in implicit.right, and we need to propagate that
|
||||
; fact down to implicit.right.grandchild, up to implicit.root, and down to
|
||||
; implicit.left.child.catch, leaving all calls and "unwind to caller" catchswitches
|
||||
; alone to so they don't conflict with the unwind edge in implicit.right
|
||||
; CHECK-NEXT: %[[implicit_root_pad:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_root_pad]]) ]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ]
|
||||
; CHECK-NEXT: to label %[[implicit_root_cont:[^ ]+]] unwind label %[[implicit_left:.+]]
|
||||
|
||||
implicit.left:
|
||||
%implicit.left.pad = cleanuppad within %implicit.root.pad []
|
||||
invoke void @g() [ "funclet"(token %implicit.left.pad) ]
|
||||
to label %unreachable unwind label %implicit.left.child
|
||||
; CHECK: [[implicit_left]]:
|
||||
; CHECK-NEXT: %[[implicit_left_pad:[^ ]+]] = cleanuppad within %[[implicit_root_pad:[^ ]+]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_left_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[implicit_left_child:.+]]
|
||||
|
||||
implicit.left.child:
|
||||
%implicit.left.child.cs = catchswitch within %implicit.left.pad [label %implicit.left.child.catch] unwind to caller
|
||||
; CHECK: [[implicit_left_child]]:
|
||||
; CHECK-NEXT: %[[implicit_left_child_cs:[^ ]+]] = catchswitch within %[[implicit_left_pad]] [label %[[implicit_left_child_catch:[^ ]+]]] unwind to caller
|
||||
|
||||
implicit.left.child.catch:
|
||||
%implicit.left.child.pad = catchpad within %implicit.left.child.cs []
|
||||
call void @g() [ "funclet"(token %implicit.left.child.pad) ]
|
||||
br label %unreachable
|
||||
; CHECK: [[implicit_left_child_catch]]:
|
||||
; CHECK-NEXT: %[[implicit_left_child_pad:[^ ]+]] = catchpad within %[[implicit_left_child_cs]]
|
||||
; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_left_child_pad]]) ]
|
||||
|
||||
implicit.root.cont:
|
||||
invoke void @g() [ "funclet"(token %implicit.root.pad) ]
|
||||
to label %unreachable unwind label %implicit.right
|
||||
; CHECK: [[implicit_root_cont]]:
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[implicit_right:.+]]
|
||||
|
||||
implicit.right:
|
||||
%implicit.right.cs = catchswitch within %implicit.root.pad [label %implicit.right.catch] unwind label %internal
|
||||
; CHECK: [[implicit_right]]:
|
||||
; This is the unwind edge (to %internal) whose existence needs to get propagated around the "implicit" tree
|
||||
; CHECK-NEXT: %[[implicit_right_cs:[^ ]+]] = catchswitch within %[[implicit_root_pad]] [label %[[implicit_right_catch:[^ ]+]]] unwind label %[[internal:.+]]
|
||||
|
||||
implicit.right.catch:
|
||||
%implicit.right.pad = catchpad within %implicit.right.cs []
|
||||
invoke void @g() [ "funclet"(token %implicit.right.pad) ]
|
||||
to label %unreachable unwind label %implicit.right.child
|
||||
; CHECK: [[implicit_right_catch]]:
|
||||
; CHECK-NEXT: %[[implicit_right_pad:[^ ]+]] = catchpad within %[[implicit_right_cs]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[implicit_right_child:.+]]
|
||||
|
||||
implicit.right.child:
|
||||
%implicit.right.child.pad = cleanuppad within %implicit.right.pad []
|
||||
invoke void @g() [ "funclet"(token %implicit.right.child.pad) ]
|
||||
to label %unreachable unwind label %implicit.right.grandchild
|
||||
; CHECK: [[implicit_right_child]]:
|
||||
; CHECK-NEXT: %[[implicit_right_child_pad:[^ ]+]] = cleanuppad within %[[implicit_right_pad]]
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_child_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %[[implicit_right_grandchild:.+]]
|
||||
|
||||
implicit.right.grandchild:
|
||||
%implicit.right.grandchild.cs = catchswitch within %implicit.right.child.pad [label %implicit.right.grandchild.catch] unwind to caller
|
||||
; CHECK: [[implicit_right_grandchild]]:
|
||||
; CHECK-NEXT: %[[implicit_right_grandchild_cs:[^ ]+]] = catchswitch within %[[implicit_right_child_pad]] [label %[[implicit_right_grandchild_catch:[^ ]+]]] unwind to caller
|
||||
|
||||
implicit.right.grandchild.catch:
|
||||
%implicit.right.grandhcild.pad = catchpad within %implicit.right.grandchild.cs []
|
||||
call void @g() [ "funclet"(token %implicit.right.grandhcild.pad) ]
|
||||
br label %unreachable
|
||||
; CHECK: [[implicit_right_grandchild_catch]]:
|
||||
; CHECK-NEXT: %[[implicit_right_grandhcild_pad:[^ ]+]] = catchpad within %[[implicit_right_grandchild_cs]]
|
||||
; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_right_grandhcild_pad]]) ]
|
||||
|
||||
internal:
|
||||
%internal.pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %internal.pad) ]
|
||||
cleanupret from %internal.pad unwind to caller
|
||||
; CHECK: [[internal]]:
|
||||
; internal is a cleanup with a "return to caller" cleanuppad; that needs to get redirected
|
||||
; to %cleanup in the caller, and the call needs to get similarly rewritten to an invoke.
|
||||
; CHECK-NEXT: %[[internal_pad:[^ ]+]] = cleanuppad within none
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %internal.pad.i) ]
|
||||
; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}}
|
||||
; CHECK: [[next]]:
|
||||
; CHECK-NEXT: cleanupret from %[[internal_pad]] unwind label %cleanup{{$}}
|
||||
|
||||
unreachable:
|
||||
unreachable
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare void @ProcessCLRException()
|
||||
|
||||
; Make sure the logic doesn't get tripped up when the inlined invoke is
|
||||
; itself within a funclet in the caller.
|
||||
; CHECK-LABEL: define void @test6(
|
||||
define void @test6() personality void ()* @ProcessCLRException {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %callsite_parent
|
||||
callsite_parent:
|
||||
%callsite_parent.pad = cleanuppad within none []
|
||||
; CHECK: %callsite_parent.pad = cleanuppad within none
|
||||
invoke void @test6_inlinee() [ "funclet"(token %callsite_parent.pad) ]
|
||||
to label %ret unwind label %cleanup
|
||||
ret:
|
||||
cleanupret from %callsite_parent.pad unwind label %cleanup
|
||||
cleanup:
|
||||
%pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %pad) ]
|
||||
cleanupret from %pad unwind to caller
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @test6_inlinee() alwaysinline personality void ()* @ProcessCLRException {
|
||||
entry:
|
||||
invoke void @g()
|
||||
to label %exit unwind label %inlinee_cleanup
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %callsite_parent.pad) ]
|
||||
; CHECK-NEXT: unwind label %[[inlinee_cleanup:.+]]
|
||||
|
||||
inlinee_cleanup:
|
||||
%inlinee.pad = cleanuppad within none []
|
||||
call void @g() [ "funclet"(token %inlinee.pad) ]
|
||||
unreachable
|
||||
; CHECK: [[inlinee_cleanup]]:
|
||||
; CHECK-NEXT: %[[inlinee_pad:[^ ]+]] = cleanuppad within %callsite_parent.pad
|
||||
; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[inlinee_pad]]) ]
|
||||
; CHECK-NEXT: unwind label %cleanup{{$}}
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
53
test/Transforms/InstCombine/bitreverse-hang.ll
Normal file
53
test/Transforms/InstCombine/bitreverse-hang.ll
Normal file
@ -0,0 +1,53 @@
|
||||
; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s
|
||||
|
||||
; This test is a worst-case scenario for bitreversal/byteswap detection.
|
||||
; After loop unrolling (the unrolled loop is unreadably large so it has been kept
|
||||
; rolled here), we have a binary tree of OR operands (as bitreversal detection
|
||||
; looks straight through shifts):
|
||||
;
|
||||
; OR
|
||||
; | \
|
||||
; | LSHR
|
||||
; | /
|
||||
; OR
|
||||
; | \
|
||||
; | LSHR
|
||||
; | /
|
||||
; OR
|
||||
;
|
||||
; This results in exponential runtime. The loop here is 32 iterations which will
|
||||
; totally hang if we don't deal with this case cleverly.
|
||||
|
||||
@b = common global i32 0, align 4
|
||||
|
||||
; CHECK: define i32 @fn1
|
||||
define i32 @fn1() #0 {
|
||||
entry:
|
||||
%b.promoted = load i32, i32* @b, align 4, !tbaa !2
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
|
||||
%i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
%shr = lshr i32 %or4, 1
|
||||
%or = or i32 %shr, %or4
|
||||
%inc = add nuw nsw i32 %i.03, 1
|
||||
%exitcond = icmp eq i32 %inc, 32
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
store i32 %or, i32* @b, align 4, !tbaa !2
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!llvm.module.flags = !{!0}
|
||||
!llvm.ident = !{!1}
|
||||
|
||||
!0 = !{i32 1, !"PIC Level", i32 2}
|
||||
!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
|
||||
!2 = !{!3, !3, i64 0}
|
||||
!3 = !{!"int", !4, i64 0}
|
||||
!4 = !{!"omnipotent char", !5, i64 0}
|
||||
!5 = !{!"Simple C/C++ TBAA"}
|
@ -1,114 +0,0 @@
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.10.0"
|
||||
|
||||
define zeroext i8 @f_u8(i8 zeroext %a) {
|
||||
; CHECK-LABEL: @f_u8
|
||||
; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a)
|
||||
; CHECK-NEXT: ret i8 %[[A]]
|
||||
%1 = shl i8 %a, 7
|
||||
%2 = shl i8 %a, 5
|
||||
%3 = and i8 %2, 64
|
||||
%4 = shl i8 %a, 3
|
||||
%5 = and i8 %4, 32
|
||||
%6 = shl i8 %a, 1
|
||||
%7 = and i8 %6, 16
|
||||
%8 = lshr i8 %a, 1
|
||||
%9 = and i8 %8, 8
|
||||
%10 = lshr i8 %a, 3
|
||||
%11 = and i8 %10, 4
|
||||
%12 = lshr i8 %a, 5
|
||||
%13 = and i8 %12, 2
|
||||
%14 = lshr i8 %a, 7
|
||||
%15 = or i8 %14, %1
|
||||
%16 = or i8 %15, %3
|
||||
%17 = or i8 %16, %5
|
||||
%18 = or i8 %17, %7
|
||||
%19 = or i8 %18, %9
|
||||
%20 = or i8 %19, %11
|
||||
%21 = or i8 %20, %13
|
||||
ret i8 %21
|
||||
}
|
||||
|
||||
; The ANDs with 32 and 64 have been swapped here, so the sequence does not
|
||||
; completely match a bitreverse.
|
||||
define zeroext i8 @f_u8_fail(i8 zeroext %a) {
|
||||
; CHECK-LABEL: @f_u8_fail
|
||||
; CHECK-NOT: call
|
||||
; CHECK: ret i8
|
||||
%1 = shl i8 %a, 7
|
||||
%2 = shl i8 %a, 5
|
||||
%3 = and i8 %2, 32
|
||||
%4 = shl i8 %a, 3
|
||||
%5 = and i8 %4, 64
|
||||
%6 = shl i8 %a, 1
|
||||
%7 = and i8 %6, 16
|
||||
%8 = lshr i8 %a, 1
|
||||
%9 = and i8 %8, 8
|
||||
%10 = lshr i8 %a, 3
|
||||
%11 = and i8 %10, 4
|
||||
%12 = lshr i8 %a, 5
|
||||
%13 = and i8 %12, 2
|
||||
%14 = lshr i8 %a, 7
|
||||
%15 = or i8 %14, %1
|
||||
%16 = or i8 %15, %3
|
||||
%17 = or i8 %16, %5
|
||||
%18 = or i8 %17, %7
|
||||
%19 = or i8 %18, %9
|
||||
%20 = or i8 %19, %11
|
||||
%21 = or i8 %20, %13
|
||||
ret i8 %21
|
||||
}
|
||||
|
||||
define zeroext i16 @f_u16(i16 zeroext %a) {
|
||||
; CHECK-LABEL: @f_u16
|
||||
; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a)
|
||||
; CHECK-NEXT: ret i16 %[[A]]
|
||||
%1 = shl i16 %a, 15
|
||||
%2 = shl i16 %a, 13
|
||||
%3 = and i16 %2, 16384
|
||||
%4 = shl i16 %a, 11
|
||||
%5 = and i16 %4, 8192
|
||||
%6 = shl i16 %a, 9
|
||||
%7 = and i16 %6, 4096
|
||||
%8 = shl i16 %a, 7
|
||||
%9 = and i16 %8, 2048
|
||||
%10 = shl i16 %a, 5
|
||||
%11 = and i16 %10, 1024
|
||||
%12 = shl i16 %a, 3
|
||||
%13 = and i16 %12, 512
|
||||
%14 = shl i16 %a, 1
|
||||
%15 = and i16 %14, 256
|
||||
%16 = lshr i16 %a, 1
|
||||
%17 = and i16 %16, 128
|
||||
%18 = lshr i16 %a, 3
|
||||
%19 = and i16 %18, 64
|
||||
%20 = lshr i16 %a, 5
|
||||
%21 = and i16 %20, 32
|
||||
%22 = lshr i16 %a, 7
|
||||
%23 = and i16 %22, 16
|
||||
%24 = lshr i16 %a, 9
|
||||
%25 = and i16 %24, 8
|
||||
%26 = lshr i16 %a, 11
|
||||
%27 = and i16 %26, 4
|
||||
%28 = lshr i16 %a, 13
|
||||
%29 = and i16 %28, 2
|
||||
%30 = lshr i16 %a, 15
|
||||
%31 = or i16 %30, %1
|
||||
%32 = or i16 %31, %3
|
||||
%33 = or i16 %32, %5
|
||||
%34 = or i16 %33, %7
|
||||
%35 = or i16 %34, %9
|
||||
%36 = or i16 %35, %11
|
||||
%37 = or i16 %36, %13
|
||||
%38 = or i16 %37, %15
|
||||
%39 = or i16 %38, %17
|
||||
%40 = or i16 %39, %19
|
||||
%41 = or i16 %40, %21
|
||||
%42 = or i16 %41, %23
|
||||
%43 = or i16 %42, %25
|
||||
%44 = or i16 %43, %27
|
||||
%45 = or i16 %44, %29
|
||||
ret i16 %45
|
||||
}
|
@ -1,12 +1,11 @@
|
||||
; Test that the cos library call simplifier works correctly.
|
||||
;
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare float @cos(double)
|
||||
declare signext i8 @sqrt(...)
|
||||
|
||||
; Check that cos functions with the wrong prototype aren't simplified.
|
||||
; Check that functions with the wrong prototype aren't simplified.
|
||||
|
||||
define float @test_no_simplify1(double %d) {
|
||||
; CHECK-LABEL: @test_no_simplify1(
|
||||
@ -15,3 +14,14 @@ define float @test_no_simplify1(double %d) {
|
||||
; CHECK: call float @cos(double %neg)
|
||||
ret float %cos
|
||||
}
|
||||
|
||||
|
||||
define i8 @bogus_sqrt() {
|
||||
%fake_sqrt = call signext i8 (...) @sqrt()
|
||||
ret i8 %fake_sqrt
|
||||
|
||||
; CHECK-LABEL: bogus_sqrt(
|
||||
; CHECK-NEXT: %fake_sqrt = call signext i8 (...) @sqrt()
|
||||
; CHECK-NEXT: ret i8 %fake_sqrt
|
||||
}
|
||||
|
||||
|
@ -364,6 +364,26 @@ define float @max1(float %a, float %b) {
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
|
||||
; A function can have a name that matches a common libcall,
|
||||
; but with the wrong type(s). Let it be.
|
||||
|
||||
define float @fake_fmin(float %a, float %b) {
|
||||
%c = fpext float %a to fp128
|
||||
%d = fpext float %b to fp128
|
||||
%e = call fp128 @fmin(fp128 %c, fp128 %d)
|
||||
%f = fptrunc fp128 %e to float
|
||||
ret float %f
|
||||
|
||||
; CHECK-LABEL: fake_fmin(
|
||||
; CHECK-NEXT: %c = fpext float %a to fp128
|
||||
; CHECK-NEXT: %d = fpext float %b to fp128
|
||||
; CHECK-NEXT: %e = call fp128 @fmin(fp128 %c, fp128 %d)
|
||||
; CHECK-NEXT: %f = fptrunc fp128 %e to float
|
||||
; CHECK-NEXT: ret float %f
|
||||
}
|
||||
|
||||
declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
|
||||
|
||||
declare double @fmax(double, double)
|
||||
|
||||
declare double @tanh(double) #1
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "OrcLazyJIT.h"
|
||||
#include "RemoteJITUtils.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#include "llvm/Bitcode/ReaderWriter.h"
|
||||
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
|
||||
@ -741,11 +742,11 @@ std::unique_ptr<FDRPCChannel> launchRemote() {
|
||||
ChildPath.reset(new char[ChildExecPath.size() + 1]);
|
||||
std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);
|
||||
ChildPath[ChildExecPath.size()] = '\0';
|
||||
std::string ChildInStr = std::to_string(PipeFD[0][0]);
|
||||
std::string ChildInStr = utostr(PipeFD[0][0]);
|
||||
ChildIn.reset(new char[ChildInStr.size() + 1]);
|
||||
std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);
|
||||
ChildIn[ChildInStr.size()] = '\0';
|
||||
std::string ChildOutStr = std::to_string(PipeFD[1][1]);
|
||||
std::string ChildOutStr = utostr(PipeFD[1][1]);
|
||||
ChildOut.reset(new char[ChildOutStr.size() + 1]);
|
||||
std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);
|
||||
ChildOut[ChildOutStr.size()] = '\0';
|
||||
|
@ -65,11 +65,6 @@ function usage() {
|
||||
echo " -no-openmp Disable check-out & build libomp"
|
||||
}
|
||||
|
||||
if [ `uname -s` = "Darwin" ]; then
|
||||
# compiler-rt doesn't yet build with CMake on Darwin.
|
||||
use_autoconf="yes"
|
||||
fi
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case $1 in
|
||||
-release | --release )
|
||||
@ -288,10 +283,20 @@ function export_sources() {
|
||||
if [ ! -h clang ]; then
|
||||
ln -s ../../cfe.src clang
|
||||
fi
|
||||
cd $BuildDir/llvm.src/tools/clang/tools
|
||||
if [ ! -h extra ]; then
|
||||
ln -s ../../../../clang-tools-extra.src extra
|
||||
|
||||
# The autoconf and CMake builds want different symlinks here:
|
||||
if [ "$use_autoconf" = "yes" ]; then
|
||||
cd $BuildDir/llvm.src/tools/clang/tools
|
||||
if [ ! -h extra ]; then
|
||||
ln -s ../../../../clang-tools-extra.src extra
|
||||
fi
|
||||
else
|
||||
cd $BuildDir/cfe.src/tools
|
||||
if [ ! -h extra ]; then
|
||||
ln -s ../../clang-tools-extra.src extra
|
||||
fi
|
||||
fi
|
||||
|
||||
cd $BuildDir/llvm.src/projects
|
||||
if [ -d $BuildDir/test-suite.src ] && [ ! -h test-suite ]; then
|
||||
ln -s ../../test-suite.src test-suite
|
||||
|
Loading…
Reference in New Issue
Block a user