Update llvm and clang to release_38 branch r258549.
This commit is contained in:
parent
7cf07624a0
commit
6e0d73d099
@ -295,7 +295,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Should we be emitting segmented stack stuff for the function
|
/// Should we be emitting segmented stack stuff for the function
|
||||||
bool shouldSplitStack();
|
bool shouldSplitStack() const;
|
||||||
|
|
||||||
/// getNumBlockIDs - Return the number of MBB ID's allocated.
|
/// getNumBlockIDs - Return the number of MBB ID's allocated.
|
||||||
///
|
///
|
||||||
|
@ -369,6 +369,18 @@ public:
|
|||||||
(UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
|
(UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
|
||||||
(NoSignedZeros << 6) | (AllowReciprocal << 7);
|
(NoSignedZeros << 6) | (AllowReciprocal << 7);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Clear any flags in this flag set that aren't also set in Flags.
|
||||||
|
void intersectWith(const SDNodeFlags *Flags) {
|
||||||
|
NoUnsignedWrap &= Flags->NoUnsignedWrap;
|
||||||
|
NoSignedWrap &= Flags->NoSignedWrap;
|
||||||
|
Exact &= Flags->Exact;
|
||||||
|
UnsafeAlgebra &= Flags->UnsafeAlgebra;
|
||||||
|
NoNaNs &= Flags->NoNaNs;
|
||||||
|
NoInfs &= Flags->NoInfs;
|
||||||
|
NoSignedZeros &= Flags->NoSignedZeros;
|
||||||
|
AllowReciprocal &= Flags->AllowReciprocal;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Represents one node in the SelectionDAG.
|
/// Represents one node in the SelectionDAG.
|
||||||
@ -682,6 +694,9 @@ public:
|
|||||||
/// and directly, but it is not to avoid creating a vtable for this class.
|
/// and directly, but it is not to avoid creating a vtable for this class.
|
||||||
const SDNodeFlags *getFlags() const;
|
const SDNodeFlags *getFlags() const;
|
||||||
|
|
||||||
|
/// Clear any flags in this node that aren't also set in Flags.
|
||||||
|
void intersectFlagsWith(const SDNodeFlags *Flags);
|
||||||
|
|
||||||
/// Return the number of values defined/returned by this operator.
|
/// Return the number of values defined/returned by this operator.
|
||||||
unsigned getNumValues() const { return NumValues; }
|
unsigned getNumValues() const { return NumValues; }
|
||||||
|
|
||||||
|
@ -331,6 +331,25 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
|
|||||||
/// during lowering by the GC infrastructure.
|
/// during lowering by the GC infrastructure.
|
||||||
bool callsGCLeafFunction(ImmutableCallSite CS);
|
bool callsGCLeafFunction(ImmutableCallSite CS);
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Intrinsic pattern matching
|
||||||
|
//
|
||||||
|
|
||||||
|
/// Try and match a bitreverse or bswap idiom.
|
||||||
|
///
|
||||||
|
/// If an idiom is matched, an intrinsic call is inserted before \c I. Any added
|
||||||
|
/// instructions are returned in \c InsertedInsts. They will all have been added
|
||||||
|
/// to a basic block.
|
||||||
|
///
|
||||||
|
/// A bitreverse idiom normally requires around 2*BW nodes to be searched (where
|
||||||
|
/// BW is the bitwidth of the integer type). A bswap idiom requires anywhere up
|
||||||
|
/// to BW / 4 nodes to be searched, so is significantly faster.
|
||||||
|
///
|
||||||
|
/// This function returns true on a successful match or false otherwise.
|
||||||
|
bool recognizeBitReverseOrBSwapIdiom(
|
||||||
|
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
|
||||||
|
SmallVectorImpl<Instruction *> &InsertedInsts);
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -125,8 +125,6 @@ private:
|
|||||||
Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B);
|
||||||
|
|
||||||
// Math Library Optimizations
|
// Math Library Optimizations
|
||||||
Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType);
|
|
||||||
Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B);
|
|
||||||
Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeCos(CallInst *CI, IRBuilder<> &B);
|
||||||
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
Value *optimizePow(CallInst *CI, IRBuilder<> &B);
|
||||||
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
|
||||||
|
@ -5211,6 +5211,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given an OR instruction, check to see if this is a bitreverse
|
||||||
|
/// idiom. If so, insert the new intrinsic and return true.
|
||||||
|
static bool makeBitReverse(Instruction &I, const DataLayout &DL,
|
||||||
|
const TargetLowering &TLI) {
|
||||||
|
if (!I.getType()->isIntegerTy() ||
|
||||||
|
!TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
|
||||||
|
TLI.getValueType(DL, I.getType(), true)))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
SmallVector<Instruction*, 4> Insts;
|
||||||
|
if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts))
|
||||||
|
return false;
|
||||||
|
Instruction *LastInst = Insts.back();
|
||||||
|
I.replaceAllUsesWith(LastInst);
|
||||||
|
RecursivelyDeleteTriviallyDeadInstructions(&I);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// In this pass we look for GEP and cast instructions that are used
|
// In this pass we look for GEP and cast instructions that are used
|
||||||
// across basic blocks and rewrite them to improve basic-block-at-a-time
|
// across basic blocks and rewrite them to improve basic-block-at-a-time
|
||||||
// selection.
|
// selection.
|
||||||
@ -5224,6 +5242,17 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
|
|||||||
if (ModifiedDT)
|
if (ModifiedDT)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MadeBitReverse = true;
|
||||||
|
while (TLI && MadeBitReverse) {
|
||||||
|
MadeBitReverse = false;
|
||||||
|
for (auto &I : reverse(BB)) {
|
||||||
|
if (makeBitReverse(I, *DL, *TLI)) {
|
||||||
|
MadeBitReverse = MadeChange = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
MadeChange |= dupRetToEnableTailCallOpts(&BB);
|
MadeChange |= dupRetToEnableTailCallOpts(&BB);
|
||||||
|
|
||||||
return MadeChange;
|
return MadeChange;
|
||||||
|
@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Should we be emitting segmented stack stuff for the function
|
/// Should we be emitting segmented stack stuff for the function
|
||||||
bool MachineFunction::shouldSplitStack() {
|
bool MachineFunction::shouldSplitStack() const {
|
||||||
return getFunction()->hasFnAttribute("split-stack");
|
return getFunction()->hasFnAttribute("split-stack");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add logical or fast math flag values to FoldingSetNodeID value.
|
|
||||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
|
|
||||||
const SDNodeFlags *Flags) {
|
|
||||||
if (!isBinOpWithFlags(Opcode))
|
|
||||||
return;
|
|
||||||
|
|
||||||
unsigned RawFlags = 0;
|
|
||||||
if (Flags)
|
|
||||||
RawFlags = Flags->getRawFlags();
|
|
||||||
ID.AddInteger(RawFlags);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
|
|
||||||
AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
|
|
||||||
}
|
|
||||||
|
|
||||||
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
|
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
|
||||||
SDVTList VTList, ArrayRef<SDValue> OpList) {
|
SDVTList VTList, ArrayRef<SDValue> OpList) {
|
||||||
AddNodeIDOpcode(ID, OpC);
|
AddNodeIDOpcode(ID, OpC);
|
||||||
@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
|
|||||||
}
|
}
|
||||||
} // end switch (N->getOpcode())
|
} // end switch (N->getOpcode())
|
||||||
|
|
||||||
AddNodeIDFlags(ID, N);
|
|
||||||
|
|
||||||
// Target specific memory nodes could also have address spaces to check.
|
// Target specific memory nodes could also have address spaces to check.
|
||||||
if (N->isTargetMemoryOpcode())
|
if (N->isTargetMemoryOpcode())
|
||||||
ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
|
ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace());
|
||||||
@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
|
|||||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||||
AddNodeIDCustom(ID, N);
|
AddNodeIDCustom(ID, N);
|
||||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||||
|
if (Node)
|
||||||
|
if (const SDNodeFlags *Flags = N->getFlags())
|
||||||
|
Node->intersectFlagsWith(Flags);
|
||||||
return Node;
|
return Node;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
|
|||||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||||
AddNodeIDCustom(ID, N);
|
AddNodeIDCustom(ID, N);
|
||||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||||
|
if (Node)
|
||||||
|
if (const SDNodeFlags *Flags = N->getFlags())
|
||||||
|
Node->intersectFlagsWith(Flags);
|
||||||
return Node;
|
return Node;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
|
|||||||
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
|
||||||
AddNodeIDCustom(ID, N);
|
AddNodeIDCustom(ID, N);
|
||||||
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos);
|
||||||
|
if (Node)
|
||||||
|
if (const SDNodeFlags *Flags = N->getFlags())
|
||||||
|
Node->intersectFlagsWith(Flags);
|
||||||
return Node;
|
return Node;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
|
|||||||
SDValue Ops[] = {N1, N2};
|
SDValue Ops[] = {N1, N2};
|
||||||
FoldingSetNodeID ID;
|
FoldingSetNodeID ID;
|
||||||
AddNodeIDNode(ID, Opcode, VTs, Ops);
|
AddNodeIDNode(ID, Opcode, VTs, Ops);
|
||||||
AddNodeIDFlags(ID, Opcode, Flags);
|
|
||||||
void *IP = nullptr;
|
void *IP = nullptr;
|
||||||
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))
|
if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) {
|
||||||
|
if (Flags)
|
||||||
|
E->intersectFlagsWith(Flags);
|
||||||
return SDValue(E, 0);
|
return SDValue(E, 0);
|
||||||
|
}
|
||||||
|
|
||||||
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
|
N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags);
|
||||||
|
|
||||||
@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
|
|||||||
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
|
if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
|
||||||
FoldingSetNodeID ID;
|
FoldingSetNodeID ID;
|
||||||
AddNodeIDNode(ID, Opcode, VTList, Ops);
|
AddNodeIDNode(ID, Opcode, VTList, Ops);
|
||||||
AddNodeIDFlags(ID, Opcode, Flags);
|
|
||||||
void *IP = nullptr;
|
void *IP = nullptr;
|
||||||
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP))
|
if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) {
|
||||||
|
if (Flags)
|
||||||
|
E->intersectFlagsWith(Flags);
|
||||||
return E;
|
return E;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) {
|
||||||
|
if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
|
||||||
|
FlagsNode->Flags.intersectWith(Flags);
|
||||||
|
}
|
||||||
|
|
||||||
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
|
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
|
||||||
assert(N->getNumValues() == 1 &&
|
assert(N->getNumValues() == 1 &&
|
||||||
"Can't unroll a vector with multiple results!");
|
"Can't unroll a vector with multiple results!");
|
||||||
|
@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
|
|||||||
|
|
||||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||||
|
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||||
const TargetRegisterClass *RC = nullptr;
|
const TargetRegisterClass *RC = nullptr;
|
||||||
if (AArch64::GPR64RegClass.contains(*I))
|
if (AArch64::GPR64RegClass.contains(*I))
|
||||||
@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
|
|||||||
Attribute::NoUnwind) &&
|
Attribute::NoUnwind) &&
|
||||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||||
Entry->addLiveIn(*I);
|
Entry->addLiveIn(*I);
|
||||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||||
NewVR)
|
|
||||||
.addReg(*I);
|
.addReg(*I);
|
||||||
|
|
||||||
|
// Insert the copy-back instructions right before the terminator.
|
||||||
for (auto *Exit : Exits)
|
for (auto *Exit : Exits)
|
||||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||||
*I)
|
TII->get(TargetOpcode::COPY), *I)
|
||||||
.addReg(NewVR);
|
.addReg(NewVR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -112,9 +112,21 @@ public:
|
|||||||
MCELFStreamer::EmitInstruction(Inst, STI);
|
MCELFStreamer::EmitInstruction(Inst, STI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Emit a 32-bit value as an instruction. This is only used for the .inst
|
||||||
|
/// directive, EmitInstruction should be used in other cases.
|
||||||
void emitInst(uint32_t Inst) {
|
void emitInst(uint32_t Inst) {
|
||||||
|
char Buffer[4];
|
||||||
|
|
||||||
|
// We can't just use EmitIntValue here, as that will emit a data mapping
|
||||||
|
// symbol, and swap the endianness on big-endian systems (instructions are
|
||||||
|
// always little-endian).
|
||||||
|
for (unsigned I = 0; I < 4; ++I) {
|
||||||
|
Buffer[I] = uint8_t(Inst);
|
||||||
|
Inst >>= 8;
|
||||||
|
}
|
||||||
|
|
||||||
EmitA64MappingSymbol();
|
EmitA64MappingSymbol();
|
||||||
MCELFStreamer::EmitIntValue(Inst, 4);
|
MCELFStreamer::EmitBytes(StringRef(Buffer, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This is one of the functions used to emit data into an ELF section, so the
|
/// This is one of the functions used to emit data into an ELF section, so the
|
||||||
|
@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR(
|
|||||||
|
|
||||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||||
|
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||||
const TargetRegisterClass *RC = nullptr;
|
const TargetRegisterClass *RC = nullptr;
|
||||||
if (ARM::GPRRegClass.contains(*I))
|
if (ARM::GPRRegClass.contains(*I))
|
||||||
@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR(
|
|||||||
Attribute::NoUnwind) &&
|
Attribute::NoUnwind) &&
|
||||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||||
Entry->addLiveIn(*I);
|
Entry->addLiveIn(*I);
|
||||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||||
NewVR)
|
|
||||||
.addReg(*I);
|
.addReg(*I);
|
||||||
|
|
||||||
|
// Insert the copy-back instructions right before the terminator.
|
||||||
for (auto *Exit : Exits)
|
for (auto *Exit : Exits)
|
||||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||||
*I)
|
TII->get(TargetOpcode::COPY), *I)
|
||||||
.addReg(NewVR);
|
.addReg(NewVR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
|
|||||||
R8, R9, R10, R11)>;
|
R8, R9, R10, R11)>;
|
||||||
|
|
||||||
// CSRs that are handled by prologue, epilogue.
|
// CSRs that are handled by prologue, epilogue.
|
||||||
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
|
def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>;
|
||||||
|
|
||||||
// CSRs that are handled explicitly via copies.
|
// CSRs that are handled explicitly via copies.
|
||||||
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
|
def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>;
|
||||||
|
|
||||||
// All GPRs - except r11
|
// All GPRs - except r11
|
||||||
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
|
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
|
||||||
|
@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks(
|
|||||||
unsigned TlsReg, TlsOffset;
|
unsigned TlsReg, TlsOffset;
|
||||||
DebugLoc DL;
|
DebugLoc DL;
|
||||||
|
|
||||||
|
// To support shrink-wrapping we would need to insert the new blocks
|
||||||
|
// at the right place and update the branches to PrologueMBB.
|
||||||
|
assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
|
||||||
|
|
||||||
unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
|
unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
|
||||||
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
|
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
|
||||||
"Scratch register is live-in");
|
"Scratch register is live-in");
|
||||||
@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue(
|
|||||||
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
|
MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
|
||||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
DebugLoc DL;
|
DebugLoc DL;
|
||||||
|
|
||||||
|
// To support shrink-wrapping we would need to insert the new blocks
|
||||||
|
// at the right place and update the branches to PrologueMBB.
|
||||||
|
assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet");
|
||||||
|
|
||||||
// HiPE-specific values
|
// HiPE-specific values
|
||||||
const unsigned HipeLeafWords = 24;
|
const unsigned HipeLeafWords = 24;
|
||||||
const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
|
const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
|
||||||
@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
|
|||||||
bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
|
bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
|
||||||
// If we may need to emit frameless compact unwind information, give
|
// If we may need to emit frameless compact unwind information, give
|
||||||
// up as this is currently broken: PR25614.
|
// up as this is currently broken: PR25614.
|
||||||
return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF);
|
return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) &&
|
||||||
|
// The lowering of segmented stack and HiPE only support entry blocks
|
||||||
|
// as prologue blocks: PR26107.
|
||||||
|
// This limitation may be lifted if we fix:
|
||||||
|
// - adjustForSegmentedStacks
|
||||||
|
// - adjustForHiPEPrologue
|
||||||
|
MF.getFunction()->getCallingConv() != CallingConv::HiPE &&
|
||||||
|
!MF.shouldSplitStack();
|
||||||
}
|
}
|
||||||
|
|
||||||
MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
|
MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
|
||||||
|
@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR(
|
|||||||
|
|
||||||
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
|
||||||
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
|
||||||
|
MachineBasicBlock::iterator MBBI = Entry->begin();
|
||||||
for (const MCPhysReg *I = IStart; *I; ++I) {
|
for (const MCPhysReg *I = IStart; *I; ++I) {
|
||||||
const TargetRegisterClass *RC = nullptr;
|
const TargetRegisterClass *RC = nullptr;
|
||||||
if (X86::GR64RegClass.contains(*I))
|
if (X86::GR64RegClass.contains(*I))
|
||||||
@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR(
|
|||||||
Attribute::NoUnwind) &&
|
Attribute::NoUnwind) &&
|
||||||
"Function should be nounwind in insertCopiesSplitCSR!");
|
"Function should be nounwind in insertCopiesSplitCSR!");
|
||||||
Entry->addLiveIn(*I);
|
Entry->addLiveIn(*I);
|
||||||
BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
|
||||||
NewVR)
|
|
||||||
.addReg(*I);
|
.addReg(*I);
|
||||||
|
|
||||||
|
// Insert the copy-back instructions right before the terminator.
|
||||||
for (auto *Exit : Exits)
|
for (auto *Exit : Exits)
|
||||||
BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
|
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
|
||||||
*I)
|
TII->get(TargetOpcode::COPY), *I)
|
||||||
.addReg(NewVR);
|
.addReg(NewVR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "llvm/IR/Intrinsics.h"
|
#include "llvm/IR/Intrinsics.h"
|
||||||
#include "llvm/IR/PatternMatch.h"
|
#include "llvm/IR/PatternMatch.h"
|
||||||
#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
|
#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
|
||||||
|
#include "llvm/Transforms/Utils/Local.h"
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace PatternMatch;
|
using namespace PatternMatch;
|
||||||
|
|
||||||
@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
|
|||||||
return Changed ? &I : nullptr;
|
return Changed ? &I : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Analyze the specified subexpression and see if it is capable of providing
|
|
||||||
/// pieces of a bswap or bitreverse. The subexpression provides a potential
|
|
||||||
/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
|
|
||||||
/// the output of the expression came from a corresponding bit in some other
|
|
||||||
/// value. This function is recursive, and the end result is a mapping of
|
|
||||||
/// (value, bitnumber) to bitnumber. It is the caller's responsibility to
|
|
||||||
/// validate that all `value`s are identical and that the bitnumber to bitnumber
|
|
||||||
/// mapping is correct for a bswap or bitreverse.
|
|
||||||
///
|
|
||||||
/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
|
|
||||||
/// that the expression deposits the low byte of %X into the high byte of the
|
|
||||||
/// result and that all other bits are zero. This expression is accepted,
|
|
||||||
/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].
|
|
||||||
///
|
|
||||||
/// This function returns true if the match was unsuccessful and false if so.
|
|
||||||
/// On entry to the function the "OverallLeftShift" is a signed integer value
|
|
||||||
/// indicating the number of bits that the subexpression is later shifted. For
|
|
||||||
/// example, if the expression is later right shifted by 16 bits, the
|
|
||||||
/// OverallLeftShift value would be -16 on entry. This is used to specify which
|
|
||||||
/// bits of BitValues are actually being set.
|
|
||||||
///
|
|
||||||
/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
|
|
||||||
/// bit is masked to zero by a user. For example, in (X & 255), X will be
|
|
||||||
/// processed with a bytemask of 255. BitMask is always in the local
|
|
||||||
/// (OverallLeftShift) coordinate space.
|
|
||||||
///
|
|
||||||
static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
|
|
||||||
SmallVectorImpl<Value *> &BitValues,
|
|
||||||
SmallVectorImpl<int> &BitProvenance) {
|
|
||||||
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
|
||||||
// If this is an or instruction, it may be an inner node of the bswap.
|
|
||||||
if (I->getOpcode() == Instruction::Or)
|
|
||||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
|
||||||
BitValues, BitProvenance) ||
|
|
||||||
CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
|
|
||||||
BitValues, BitProvenance);
|
|
||||||
|
|
||||||
// If this is a logical shift by a constant, recurse with OverallLeftShift
|
|
||||||
// and BitMask adjusted.
|
|
||||||
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
|
|
||||||
unsigned ShAmt =
|
|
||||||
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
|
|
||||||
// Ensure the shift amount is defined.
|
|
||||||
if (ShAmt > BitValues.size())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
unsigned BitShift = ShAmt;
|
|
||||||
if (I->getOpcode() == Instruction::Shl) {
|
|
||||||
// X << C -> collect(X, +C)
|
|
||||||
OverallLeftShift += BitShift;
|
|
||||||
BitMask = BitMask.lshr(BitShift);
|
|
||||||
} else {
|
|
||||||
// X >>u C -> collect(X, -C)
|
|
||||||
OverallLeftShift -= BitShift;
|
|
||||||
BitMask = BitMask.shl(BitShift);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (OverallLeftShift >= (int)BitValues.size())
|
|
||||||
return true;
|
|
||||||
if (OverallLeftShift <= -(int)BitValues.size())
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
|
||||||
BitValues, BitProvenance);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is a logical 'and' with a mask that clears bits, clear the
|
|
||||||
// corresponding bits in BitMask.
|
|
||||||
if (I->getOpcode() == Instruction::And &&
|
|
||||||
isa<ConstantInt>(I->getOperand(1))) {
|
|
||||||
unsigned NumBits = BitValues.size();
|
|
||||||
APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
|
|
||||||
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
|
|
||||||
|
|
||||||
for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {
|
|
||||||
// If this bit is masked out by a later operation, we don't care what
|
|
||||||
// the and mask is.
|
|
||||||
if (BitMask[i] == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// If the AndMask is zero for this bit, clear the bit.
|
|
||||||
APInt MaskB = AndMask & Bit;
|
|
||||||
if (MaskB == 0) {
|
|
||||||
BitMask.clearBit(i);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, this bit is kept.
|
|
||||||
}
|
|
||||||
|
|
||||||
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
|
|
||||||
BitValues, BitProvenance);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
|
|
||||||
// the input value to the bswap/bitreverse. To be part of a bswap or
|
|
||||||
// bitreverse we must be demanding a contiguous range of bits from it.
|
|
||||||
unsigned InputBitLen = BitMask.countPopulation();
|
|
||||||
unsigned InputBitNo = BitMask.countTrailingZeros();
|
|
||||||
if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=
|
|
||||||
InputBitLen)
|
|
||||||
// Not a contiguous set range of bits!
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// We know we're moving a contiguous range of bits from the input to the
|
|
||||||
// output. Record which bits in the output came from which bits in the input.
|
|
||||||
unsigned DestBitNo = InputBitNo + OverallLeftShift;
|
|
||||||
for (unsigned I = 0; I < InputBitLen; ++I)
|
|
||||||
BitProvenance[DestBitNo + I] = InputBitNo + I;
|
|
||||||
|
|
||||||
// If the destination bit value is already defined, the values are or'd
|
|
||||||
// together, which isn't a bswap/bitreverse (unless it's an or of the same
|
|
||||||
// bits).
|
|
||||||
if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)
|
|
||||||
return true;
|
|
||||||
for (unsigned I = 0; I < InputBitLen; ++I)
|
|
||||||
BitValues[DestBitNo + I] = V;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
|
|
||||||
unsigned BitWidth) {
|
|
||||||
if (From % 8 != To % 8)
|
|
||||||
return false;
|
|
||||||
// Convert from bit indices to byte indices and check for a byte reversal.
|
|
||||||
From >>= 3;
|
|
||||||
To >>= 3;
|
|
||||||
BitWidth >>= 3;
|
|
||||||
return From == BitWidth - To - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
|
|
||||||
unsigned BitWidth) {
|
|
||||||
return From == BitWidth - To - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Given an OR instruction, check to see if this is a bswap or bitreverse
|
/// Given an OR instruction, check to see if this is a bswap or bitreverse
|
||||||
/// idiom. If so, insert the new intrinsic and return it.
|
/// idiom. If so, insert the new intrinsic and return it.
|
||||||
Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
|
Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
|
||||||
IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
|
SmallVector<Instruction*, 4> Insts;
|
||||||
if (!ITy)
|
if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts))
|
||||||
return nullptr; // Can't do vectors.
|
|
||||||
unsigned BW = ITy->getBitWidth();
|
|
||||||
|
|
||||||
/// We keep track of which bit (BitProvenance) inside which value (BitValues)
|
|
||||||
/// defines each bit in the result.
|
|
||||||
SmallVector<Value *, 8> BitValues(BW, nullptr);
|
|
||||||
SmallVector<int, 8> BitProvenance(BW, -1);
|
|
||||||
|
|
||||||
// Try to find all the pieces corresponding to the bswap.
|
|
||||||
APInt BitMask = APInt::getAllOnesValue(BitValues.size());
|
|
||||||
if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
Instruction *LastInst = Insts.pop_back_val();
|
||||||
|
LastInst->removeFromParent();
|
||||||
|
|
||||||
// Check to see if all of the bits come from the same value.
|
for (auto *Inst : Insts)
|
||||||
Value *V = BitValues[0];
|
Worklist.Add(Inst);
|
||||||
if (!V) return nullptr; // Didn't find a bit? Must be zero.
|
return LastInst;
|
||||||
|
|
||||||
if (!std::all_of(BitValues.begin(), BitValues.end(),
|
|
||||||
[&](const Value *X) { return X == V; }))
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
|
|
||||||
// only byteswap values with an even number of bytes.
|
|
||||||
bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;
|
|
||||||
for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {
|
|
||||||
OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
|
|
||||||
OKForBitReverse &=
|
|
||||||
bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
|
|
||||||
}
|
|
||||||
|
|
||||||
Intrinsic::ID Intrin;
|
|
||||||
if (OKForBSwap)
|
|
||||||
Intrin = Intrinsic::bswap;
|
|
||||||
else if (OKForBitReverse)
|
|
||||||
Intrin = Intrinsic::bitreverse;
|
|
||||||
else
|
|
||||||
return nullptr;
|
|
||||||
|
|
||||||
Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);
|
|
||||||
return CallInst::Create(F, V);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)
|
/// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0)
|
||||||
|
@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume(
|
|||||||
RI->eraseFromParent();
|
RI->eraseFromParent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
|
||||||
|
static Value *getParentPad(Value *EHPad) {
|
||||||
|
if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
|
||||||
|
return FPI->getParentPad();
|
||||||
|
return cast<CatchSwitchInst>(EHPad)->getParentPad();
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy;
|
||||||
|
|
||||||
|
/// Helper for getUnwindDestToken that does the descendant-ward part of
|
||||||
|
/// the search.
|
||||||
|
static Value *getUnwindDestTokenHelper(Instruction *EHPad,
|
||||||
|
UnwindDestMemoTy &MemoMap) {
|
||||||
|
SmallVector<Instruction *, 8> Worklist(1, EHPad);
|
||||||
|
|
||||||
|
while (!Worklist.empty()) {
|
||||||
|
Instruction *CurrentPad = Worklist.pop_back_val();
|
||||||
|
// We only put pads on the worklist that aren't in the MemoMap. When
|
||||||
|
// we find an unwind dest for a pad we may update its ancestors, but
|
||||||
|
// the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
|
||||||
|
// so they should never get updated while queued on the worklist.
|
||||||
|
assert(!MemoMap.count(CurrentPad));
|
||||||
|
Value *UnwindDestToken = nullptr;
|
||||||
|
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {
|
||||||
|
if (CatchSwitch->hasUnwindDest()) {
|
||||||
|
UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
|
||||||
|
} else {
|
||||||
|
// Catchswitch doesn't have a 'nounwind' variant, and one might be
|
||||||
|
// annotated as "unwinds to caller" when really it's nounwind (see
|
||||||
|
// e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
|
||||||
|
// parent's unwind dest from this. We can check its catchpads'
|
||||||
|
// descendants, since they might include a cleanuppad with an
|
||||||
|
// "unwinds to caller" cleanupret, which can be trusted.
|
||||||
|
for (auto HI = CatchSwitch->handler_begin(),
|
||||||
|
HE = CatchSwitch->handler_end();
|
||||||
|
HI != HE && !UnwindDestToken; ++HI) {
|
||||||
|
BasicBlock *HandlerBlock = *HI;
|
||||||
|
auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());
|
||||||
|
for (User *Child : CatchPad->users()) {
|
||||||
|
// Intentionally ignore invokes here -- since the catchswitch is
|
||||||
|
// marked "unwind to caller", it would be a verifier error if it
|
||||||
|
// contained an invoke which unwinds out of it, so any invoke we'd
|
||||||
|
// encounter must unwind to some child of the catch.
|
||||||
|
if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Instruction *ChildPad = cast<Instruction>(Child);
|
||||||
|
auto Memo = MemoMap.find(ChildPad);
|
||||||
|
if (Memo == MemoMap.end()) {
|
||||||
|
// Haven't figure out this child pad yet; queue it.
|
||||||
|
Worklist.push_back(ChildPad);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// We've already checked this child, but might have found that
|
||||||
|
// it offers no proof either way.
|
||||||
|
Value *ChildUnwindDestToken = Memo->second;
|
||||||
|
if (!ChildUnwindDestToken)
|
||||||
|
continue;
|
||||||
|
// We already know the child's unwind dest, which can either
|
||||||
|
// be ConstantTokenNone to indicate unwind to caller, or can
|
||||||
|
// be another child of the catchpad. Only the former indicates
|
||||||
|
// the unwind dest of the catchswitch.
|
||||||
|
if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {
|
||||||
|
UnwindDestToken = ChildUnwindDestToken;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assert(getParentPad(ChildUnwindDestToken) == CatchPad);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);
|
||||||
|
for (User *U : CleanupPad->users()) {
|
||||||
|
if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {
|
||||||
|
if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
|
||||||
|
UnwindDestToken = RetUnwindDest->getFirstNonPHI();
|
||||||
|
else
|
||||||
|
UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Value *ChildUnwindDestToken;
|
||||||
|
if (auto *Invoke = dyn_cast<InvokeInst>(U)) {
|
||||||
|
ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
|
||||||
|
} else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
|
||||||
|
Instruction *ChildPad = cast<Instruction>(U);
|
||||||
|
auto Memo = MemoMap.find(ChildPad);
|
||||||
|
if (Memo == MemoMap.end()) {
|
||||||
|
// Haven't resolved this child yet; queue it and keep searching.
|
||||||
|
Worklist.push_back(ChildPad);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// We've checked this child, but still need to ignore it if it
|
||||||
|
// had no proof either way.
|
||||||
|
ChildUnwindDestToken = Memo->second;
|
||||||
|
if (!ChildUnwindDestToken)
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
// Not a relevant user of the cleanuppad
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// In a well-formed program, the child/invoke must either unwind to
|
||||||
|
// an(other) child of the cleanup, or exit the cleanup. In the
|
||||||
|
// first case, continue searching.
|
||||||
|
if (isa<Instruction>(ChildUnwindDestToken) &&
|
||||||
|
getParentPad(ChildUnwindDestToken) == CleanupPad)
|
||||||
|
continue;
|
||||||
|
UnwindDestToken = ChildUnwindDestToken;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we haven't found an unwind dest for CurrentPad, we may have queued its
|
||||||
|
// children, so move on to the next in the worklist.
|
||||||
|
if (!UnwindDestToken)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
|
||||||
|
// any ancestors of CurrentPad up to but not including UnwindDestToken's
|
||||||
|
// parent pad. Record this in the memo map, and check to see if the
|
||||||
|
// original EHPad being queried is one of the ones exited.
|
||||||
|
Value *UnwindParent;
|
||||||
|
if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))
|
||||||
|
UnwindParent = getParentPad(UnwindPad);
|
||||||
|
else
|
||||||
|
UnwindParent = nullptr;
|
||||||
|
bool ExitedOriginalPad = false;
|
||||||
|
for (Instruction *ExitedPad = CurrentPad;
|
||||||
|
ExitedPad && ExitedPad != UnwindParent;
|
||||||
|
ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {
|
||||||
|
// Skip over catchpads since they just follow their catchswitches.
|
||||||
|
if (isa<CatchPadInst>(ExitedPad))
|
||||||
|
continue;
|
||||||
|
MemoMap[ExitedPad] = UnwindDestToken;
|
||||||
|
ExitedOriginalPad |= (ExitedPad == EHPad);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ExitedOriginalPad)
|
||||||
|
return UnwindDestToken;
|
||||||
|
|
||||||
|
// Continue the search.
|
||||||
|
}
|
||||||
|
|
||||||
|
// No definitive information is contained within this funclet.
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
|
||||||
|
/// return that pad instruction. If it unwinds to caller, return
|
||||||
|
/// ConstantTokenNone. If it does not have a definitive unwind destination,
|
||||||
|
/// return nullptr.
|
||||||
|
///
|
||||||
|
/// This routine gets invoked for calls in funclets in inlinees when inlining
|
||||||
|
/// an invoke. Since many funclets don't have calls inside them, it's queried
|
||||||
|
/// on-demand rather than building a map of pads to unwind dests up front.
|
||||||
|
/// Determining a funclet's unwind dest may require recursively searching its
|
||||||
|
/// descendants, and also ancestors and cousins if the descendants don't provide
|
||||||
|
/// an answer. Since most funclets will have their unwind dest immediately
|
||||||
|
/// available as the unwind dest of a catchswitch or cleanupret, this routine
|
||||||
|
/// searches top-down from the given pad and then up. To avoid worst-case
|
||||||
|
/// quadratic run-time given that approach, it uses a memo map to avoid
|
||||||
|
/// re-processing funclet trees. The callers that rewrite the IR as they go
|
||||||
|
/// take advantage of this, for correctness, by checking/forcing rewritten
|
||||||
|
/// pads' entries to match the original callee view.
|
||||||
|
static Value *getUnwindDestToken(Instruction *EHPad,
|
||||||
|
UnwindDestMemoTy &MemoMap) {
|
||||||
|
// Catchpads unwind to the same place as their catchswitch;
|
||||||
|
// redirct any queries on catchpads so the code below can
|
||||||
|
// deal with just catchswitches and cleanuppads.
|
||||||
|
if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))
|
||||||
|
EHPad = CPI->getCatchSwitch();
|
||||||
|
|
||||||
|
// Check if we've already determined the unwind dest for this pad.
|
||||||
|
auto Memo = MemoMap.find(EHPad);
|
||||||
|
if (Memo != MemoMap.end())
|
||||||
|
return Memo->second;
|
||||||
|
|
||||||
|
// Search EHPad and, if necessary, its descendants.
|
||||||
|
Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
|
||||||
|
assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
|
||||||
|
if (UnwindDestToken)
|
||||||
|
return UnwindDestToken;
|
||||||
|
|
||||||
|
// No information is available for this EHPad from itself or any of its
|
||||||
|
// descendants. An unwind all the way out to a pad in the caller would
|
||||||
|
// need also to agree with the unwind dest of the parent funclet, so
|
||||||
|
// search up the chain to try to find a funclet with information. Put
|
||||||
|
// null entries in the memo map to avoid re-processing as we go up.
|
||||||
|
MemoMap[EHPad] = nullptr;
|
||||||
|
Instruction *LastUselessPad = EHPad;
|
||||||
|
Value *AncestorToken;
|
||||||
|
for (AncestorToken = getParentPad(EHPad);
|
||||||
|
auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);
|
||||||
|
AncestorToken = getParentPad(AncestorToken)) {
|
||||||
|
// Skip over catchpads since they just follow their catchswitches.
|
||||||
|
if (isa<CatchPadInst>(AncestorPad))
|
||||||
|
continue;
|
||||||
|
assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
|
||||||
|
auto AncestorMemo = MemoMap.find(AncestorPad);
|
||||||
|
if (AncestorMemo == MemoMap.end()) {
|
||||||
|
UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);
|
||||||
|
} else {
|
||||||
|
UnwindDestToken = AncestorMemo->second;
|
||||||
|
}
|
||||||
|
if (UnwindDestToken)
|
||||||
|
break;
|
||||||
|
LastUselessPad = AncestorPad;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Since the whole tree under LastUselessPad has no information, it all must
|
||||||
|
// match UnwindDestToken; record that to avoid repeating the search.
|
||||||
|
SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
|
||||||
|
while (!Worklist.empty()) {
|
||||||
|
Instruction *UselessPad = Worklist.pop_back_val();
|
||||||
|
assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr);
|
||||||
|
MemoMap[UselessPad] = UnwindDestToken;
|
||||||
|
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
|
||||||
|
for (BasicBlock *HandlerBlock : CatchSwitch->handlers())
|
||||||
|
for (User *U : HandlerBlock->getFirstNonPHI()->users())
|
||||||
|
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
|
||||||
|
Worklist.push_back(cast<Instruction>(U));
|
||||||
|
} else {
|
||||||
|
assert(isa<CleanupPadInst>(UselessPad));
|
||||||
|
for (User *U : UselessPad->users())
|
||||||
|
if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
|
||||||
|
Worklist.push_back(cast<Instruction>(U));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return UnwindDestToken;
|
||||||
|
}
|
||||||
|
|
||||||
/// When we inline a basic block into an invoke,
|
/// When we inline a basic block into an invoke,
|
||||||
/// we have to turn all of the calls that can throw into invokes.
|
/// we have to turn all of the calls that can throw into invokes.
|
||||||
/// This function analyze BB to see if there are any calls, and if so,
|
/// This function analyze BB to see if there are any calls, and if so,
|
||||||
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
|
/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
|
||||||
/// nodes in that block with the values specified in InvokeDestPHIValues.
|
/// nodes in that block with the values specified in InvokeDestPHIValues.
|
||||||
static BasicBlock *
|
static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
|
||||||
HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
|
BasicBlock *BB, BasicBlock *UnwindEdge,
|
||||||
|
UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
|
||||||
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
|
for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
|
||||||
Instruction *I = &*BBI++;
|
Instruction *I = &*BBI++;
|
||||||
|
|
||||||
@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
|
|||||||
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
|
if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {
|
||||||
|
// This call is nested inside a funclet. If that funclet has an unwind
|
||||||
|
// destination within the inlinee, then unwinding out of this call would
|
||||||
|
// be UB. Rewriting this call to an invoke which targets the inlined
|
||||||
|
// invoke's unwind dest would give the call's parent funclet multiple
|
||||||
|
// unwind destinations, which is something that subsequent EH table
|
||||||
|
// generation can't handle and that the veirifer rejects. So when we
|
||||||
|
// see such a call, leave it as a call.
|
||||||
|
auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);
|
||||||
|
Value *UnwindDestToken =
|
||||||
|
getUnwindDestToken(FuncletPad, *FuncletUnwindMap);
|
||||||
|
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
|
||||||
|
continue;
|
||||||
|
#ifndef NDEBUG
|
||||||
|
Instruction *MemoKey;
|
||||||
|
if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
|
||||||
|
MemoKey = CatchPad->getCatchSwitch();
|
||||||
|
else
|
||||||
|
MemoKey = FuncletPad;
|
||||||
|
assert(FuncletUnwindMap->count(MemoKey) &&
|
||||||
|
(*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
|
||||||
|
"must get memoized to avoid confusing later searches");
|
||||||
|
#endif // NDEBUG
|
||||||
|
}
|
||||||
|
|
||||||
// Convert this function call into an invoke instruction. First, split the
|
// Convert this function call into an invoke instruction. First, split the
|
||||||
// basic block.
|
// basic block.
|
||||||
BasicBlock *Split =
|
BasicBlock *Split =
|
||||||
@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
|||||||
|
|
||||||
// This connects all the instructions which 'unwind to caller' to the invoke
|
// This connects all the instructions which 'unwind to caller' to the invoke
|
||||||
// destination.
|
// destination.
|
||||||
|
UnwindDestMemoTy FuncletUnwindMap;
|
||||||
for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
|
for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
|
||||||
BB != E; ++BB) {
|
BB != E; ++BB) {
|
||||||
if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
|
if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
|
||||||
if (CRI->unwindsToCaller()) {
|
if (CRI->unwindsToCaller()) {
|
||||||
CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);
|
auto *CleanupPad = CRI->getCleanupPad();
|
||||||
|
CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI);
|
||||||
CRI->eraseFromParent();
|
CRI->eraseFromParent();
|
||||||
UpdatePHINodes(&*BB);
|
UpdatePHINodes(&*BB);
|
||||||
|
// Finding a cleanupret with an unwind destination would confuse
|
||||||
|
// subsequent calls to getUnwindDestToken, so map the cleanuppad
|
||||||
|
// to short-circuit any such calls and recognize this as an "unwind
|
||||||
|
// to caller" cleanup.
|
||||||
|
assert(!FuncletUnwindMap.count(CleanupPad) ||
|
||||||
|
isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
|
||||||
|
FuncletUnwindMap[CleanupPad] =
|
||||||
|
ConstantTokenNone::get(Caller->getContext());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
|||||||
Instruction *Replacement = nullptr;
|
Instruction *Replacement = nullptr;
|
||||||
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
|
if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
|
||||||
if (CatchSwitch->unwindsToCaller()) {
|
if (CatchSwitch->unwindsToCaller()) {
|
||||||
|
Value *UnwindDestToken;
|
||||||
|
if (auto *ParentPad =
|
||||||
|
dyn_cast<Instruction>(CatchSwitch->getParentPad())) {
|
||||||
|
// This catchswitch is nested inside another funclet. If that
|
||||||
|
// funclet has an unwind destination within the inlinee, then
|
||||||
|
// unwinding out of this catchswitch would be UB. Rewriting this
|
||||||
|
// catchswitch to unwind to the inlined invoke's unwind dest would
|
||||||
|
// give the parent funclet multiple unwind destinations, which is
|
||||||
|
// something that subsequent EH table generation can't handle and
|
||||||
|
// that the veirifer rejects. So when we see such a call, leave it
|
||||||
|
// as "unwind to caller".
|
||||||
|
UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);
|
||||||
|
if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
// This catchswitch has no parent to inherit constraints from, and
|
||||||
|
// none of its descendants can have an unwind edge that exits it and
|
||||||
|
// targets another funclet in the inlinee. It may or may not have a
|
||||||
|
// descendant that definitively has an unwind to caller. In either
|
||||||
|
// case, we'll have to assume that any unwinds out of it may need to
|
||||||
|
// be routed to the caller, so treat it as though it has a definitive
|
||||||
|
// unwind to caller.
|
||||||
|
UnwindDestToken = ConstantTokenNone::get(Caller->getContext());
|
||||||
|
}
|
||||||
auto *NewCatchSwitch = CatchSwitchInst::Create(
|
auto *NewCatchSwitch = CatchSwitchInst::Create(
|
||||||
CatchSwitch->getParentPad(), UnwindDest,
|
CatchSwitch->getParentPad(), UnwindDest,
|
||||||
CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
|
CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
|
||||||
CatchSwitch);
|
CatchSwitch);
|
||||||
for (BasicBlock *PadBB : CatchSwitch->handlers())
|
for (BasicBlock *PadBB : CatchSwitch->handlers())
|
||||||
NewCatchSwitch->addHandler(PadBB);
|
NewCatchSwitch->addHandler(PadBB);
|
||||||
|
// Propagate info for the old catchswitch over to the new one in
|
||||||
|
// the unwind map. This also serves to short-circuit any subsequent
|
||||||
|
// checks for the unwind dest of this catchswitch, which would get
|
||||||
|
// confused if they found the outer handler in the callee.
|
||||||
|
FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
|
||||||
Replacement = NewCatchSwitch;
|
Replacement = NewCatchSwitch;
|
||||||
}
|
}
|
||||||
} else if (!isa<FuncletPadInst>(I)) {
|
} else if (!isa<FuncletPadInst>(I)) {
|
||||||
@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
|
|||||||
for (Function::iterator BB = FirstNewBlock->getIterator(),
|
for (Function::iterator BB = FirstNewBlock->getIterator(),
|
||||||
E = Caller->end();
|
E = Caller->end();
|
||||||
BB != E; ++BB)
|
BB != E; ++BB)
|
||||||
if (BasicBlock *NewBB =
|
if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
|
||||||
HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))
|
&*BB, UnwindDest, &FuncletUnwindMap))
|
||||||
// Update any PHI nodes in the exceptional block to indicate that there
|
// Update any PHI nodes in the exceptional block to indicate that there
|
||||||
// is now a new entry in them.
|
// is now a new entry in them.
|
||||||
UpdatePHINodes(NewBB);
|
UpdatePHINodes(NewBB);
|
||||||
@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we are inlining for an invoke instruction, we must make sure to rewrite
|
||||||
|
// any call instructions into invoke instructions. This is sensitive to which
|
||||||
|
// funclet pads were top-level in the inlinee, so must be done before
|
||||||
|
// rewriting the "parent pad" links.
|
||||||
|
if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
|
||||||
|
BasicBlock *UnwindDest = II->getUnwindDest();
|
||||||
|
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
|
||||||
|
if (isa<LandingPadInst>(FirstNonPHI)) {
|
||||||
|
HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||||
|
} else {
|
||||||
|
HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update the lexical scopes of the new funclets and callsites.
|
// Update the lexical scopes of the new funclets and callsites.
|
||||||
// Anything that had 'none' as its parent is now nested inside the callsite's
|
// Anything that had 'none' as its parent is now nested inside the callsite's
|
||||||
// EHPad.
|
// EHPad.
|
||||||
@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are inlining for an invoke instruction, we must make sure to rewrite
|
|
||||||
// any call instructions into invoke instructions.
|
|
||||||
if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
|
|
||||||
BasicBlock *UnwindDest = II->getUnwindDest();
|
|
||||||
Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
|
|
||||||
if (isa<LandingPadInst>(FirstNonPHI)) {
|
|
||||||
HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
|
||||||
} else {
|
|
||||||
HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle any inlined musttail call sites. In order for a new call site to be
|
// Handle any inlined musttail call sites. In order for a new call site to be
|
||||||
// musttail, the source of the clone and the inlined call site must have been
|
// musttail, the source of the clone and the inlined call site must have been
|
||||||
// musttail. Therefore it's safe to return without merging control into the
|
// musttail. Therefore it's safe to return without merging control into the
|
||||||
|
@ -1592,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A potential constituent of a bitreverse or bswap expression. See
|
||||||
|
/// collectBitParts for a fuller explanation.
|
||||||
|
struct BitPart {
|
||||||
|
BitPart(Value *P, unsigned BW) : Provider(P) {
|
||||||
|
Provenance.resize(BW);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The Value that this is a bitreverse/bswap of.
|
||||||
|
Value *Provider;
|
||||||
|
/// The "provenance" of each bit. Provenance[A] = B means that bit A
|
||||||
|
/// in Provider becomes bit B in the result of this expression.
|
||||||
|
SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128.
|
||||||
|
|
||||||
|
enum { Unset = -1 };
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Analyze the specified subexpression and see if it is capable of providing
|
||||||
|
/// pieces of a bswap or bitreverse. The subexpression provides a potential
|
||||||
|
/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
|
||||||
|
/// the output of the expression came from a corresponding bit in some other
|
||||||
|
/// value. This function is recursive, and the end result is a mapping of
|
||||||
|
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
|
||||||
|
/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse.
|
||||||
|
///
|
||||||
|
/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
|
||||||
|
/// that the expression deposits the low byte of %X into the high byte of the
|
||||||
|
/// result and that all other bits are zero. This expression is accepted and a
|
||||||
|
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
|
||||||
|
/// [0-7].
|
||||||
|
///
|
||||||
|
/// To avoid revisiting values, the BitPart results are memoized into the
|
||||||
|
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
|
||||||
|
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
|
||||||
|
/// store BitParts objects, not pointers. As we need the concept of a nullptr
|
||||||
|
/// BitParts (Value has been analyzed and the analysis failed), we an Optional
|
||||||
|
/// type instead to provide the same functionality.
|
||||||
|
///
|
||||||
|
/// Because we pass around references into \c BPS, we must use a container that
|
||||||
|
/// does not invalidate internal references (std::map instead of DenseMap).
|
||||||
|
///
|
||||||
|
static const Optional<BitPart> &
|
||||||
|
collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
|
||||||
|
std::map<Value *, Optional<BitPart>> &BPS) {
|
||||||
|
auto I = BPS.find(V);
|
||||||
|
if (I != BPS.end())
|
||||||
|
return I->second;
|
||||||
|
|
||||||
|
auto &Result = BPS[V] = None;
|
||||||
|
auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
|
||||||
|
|
||||||
|
if (Instruction *I = dyn_cast<Instruction>(V)) {
|
||||||
|
// If this is an or instruction, it may be an inner node of the bswap.
|
||||||
|
if (I->getOpcode() == Instruction::Or) {
|
||||||
|
auto &A = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||||
|
MatchBitReversals, BPS);
|
||||||
|
auto &B = collectBitParts(I->getOperand(1), MatchBSwaps,
|
||||||
|
MatchBitReversals, BPS);
|
||||||
|
if (!A || !B)
|
||||||
|
return Result;
|
||||||
|
|
||||||
|
// Try and merge the two together.
|
||||||
|
if (!A->Provider || A->Provider != B->Provider)
|
||||||
|
return Result;
|
||||||
|
|
||||||
|
Result = BitPart(A->Provider, BitWidth);
|
||||||
|
for (unsigned i = 0; i < A->Provenance.size(); ++i) {
|
||||||
|
if (A->Provenance[i] != BitPart::Unset &&
|
||||||
|
B->Provenance[i] != BitPart::Unset &&
|
||||||
|
A->Provenance[i] != B->Provenance[i])
|
||||||
|
return Result = None;
|
||||||
|
|
||||||
|
if (A->Provenance[i] == BitPart::Unset)
|
||||||
|
Result->Provenance[i] = B->Provenance[i];
|
||||||
|
else
|
||||||
|
Result->Provenance[i] = A->Provenance[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a logical shift by a constant, recurse then shift the result.
|
||||||
|
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
|
||||||
|
unsigned BitShift =
|
||||||
|
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
|
||||||
|
// Ensure the shift amount is defined.
|
||||||
|
if (BitShift > BitWidth)
|
||||||
|
return Result;
|
||||||
|
|
||||||
|
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||||
|
MatchBitReversals, BPS);
|
||||||
|
if (!Res)
|
||||||
|
return Result;
|
||||||
|
Result = Res;
|
||||||
|
|
||||||
|
// Perform the "shift" on BitProvenance.
|
||||||
|
auto &P = Result->Provenance;
|
||||||
|
if (I->getOpcode() == Instruction::Shl) {
|
||||||
|
P.erase(std::prev(P.end(), BitShift), P.end());
|
||||||
|
P.insert(P.begin(), BitShift, BitPart::Unset);
|
||||||
|
} else {
|
||||||
|
P.erase(P.begin(), std::next(P.begin(), BitShift));
|
||||||
|
P.insert(P.end(), BitShift, BitPart::Unset);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a logical 'and' with a mask that clears bits, recurse then
|
||||||
|
// unset the appropriate bits.
|
||||||
|
if (I->getOpcode() == Instruction::And &&
|
||||||
|
isa<ConstantInt>(I->getOperand(1))) {
|
||||||
|
APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
|
||||||
|
const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
|
||||||
|
|
||||||
|
// Check that the mask allows a multiple of 8 bits for a bswap, for an
|
||||||
|
// early exit.
|
||||||
|
unsigned NumMaskedBits = AndMask.countPopulation();
|
||||||
|
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
|
||||||
|
return Result;
|
||||||
|
|
||||||
|
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||||
|
MatchBitReversals, BPS);
|
||||||
|
if (!Res)
|
||||||
|
return Result;
|
||||||
|
Result = Res;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1)
|
||||||
|
// If the AndMask is zero for this bit, clear the bit.
|
||||||
|
if ((AndMask & Bit) == 0)
|
||||||
|
Result->Provenance[i] = BitPart::Unset;
|
||||||
|
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
|
||||||
|
// the input value to the bswap/bitreverse.
|
||||||
|
Result = BitPart(V, BitWidth);
|
||||||
|
for (unsigned i = 0; i < BitWidth; ++i)
|
||||||
|
Result->Provenance[i] = i;
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
|
||||||
|
unsigned BitWidth) {
|
||||||
|
if (From % 8 != To % 8)
|
||||||
|
return false;
|
||||||
|
// Convert from bit indices to byte indices and check for a byte reversal.
|
||||||
|
From >>= 3;
|
||||||
|
To >>= 3;
|
||||||
|
BitWidth >>= 3;
|
||||||
|
return From == BitWidth - To - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
|
||||||
|
unsigned BitWidth) {
|
||||||
|
return From == BitWidth - To - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given an OR instruction, check to see if this is a bitreverse
|
||||||
|
/// idiom. If so, insert the new intrinsic and return true.
|
||||||
|
bool llvm::recognizeBitReverseOrBSwapIdiom(
|
||||||
|
Instruction *I, bool MatchBSwaps, bool MatchBitReversals,
|
||||||
|
SmallVectorImpl<Instruction *> &InsertedInsts) {
|
||||||
|
if (Operator::getOpcode(I) != Instruction::Or)
|
||||||
|
return false;
|
||||||
|
if (!MatchBSwaps && !MatchBitReversals)
|
||||||
|
return false;
|
||||||
|
IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
|
||||||
|
if (!ITy || ITy->getBitWidth() > 128)
|
||||||
|
return false; // Can't do vectors or integers > 128 bits.
|
||||||
|
unsigned BW = ITy->getBitWidth();
|
||||||
|
|
||||||
|
// Try to find all the pieces corresponding to the bswap.
|
||||||
|
std::map<Value *, Optional<BitPart>> BPS;
|
||||||
|
auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS);
|
||||||
|
if (!Res)
|
||||||
|
return false;
|
||||||
|
auto &BitProvenance = Res->Provenance;
|
||||||
|
|
||||||
|
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
|
||||||
|
// only byteswap values with an even number of bytes.
|
||||||
|
bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;
|
||||||
|
for (unsigned i = 0; i < BW; ++i) {
|
||||||
|
OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
|
||||||
|
OKForBitReverse &=
|
||||||
|
bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
|
||||||
|
}
|
||||||
|
|
||||||
|
Intrinsic::ID Intrin;
|
||||||
|
if (OKForBSwap && MatchBSwaps)
|
||||||
|
Intrin = Intrinsic::bswap;
|
||||||
|
else if (OKForBitReverse && MatchBitReversals)
|
||||||
|
Intrin = Intrinsic::bitreverse;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy);
|
||||||
|
InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
/// Any floating-point library function that we're trying to simplify will have
|
||||||
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
|
/// a signature of the form: fptype foo(fptype param1, fptype param2, ...).
|
||||||
|
/// CheckDoubleTy indicates that 'fptype' must be 'double'.
|
||||||
|
static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams,
|
||||||
|
bool CheckDoubleTy) {
|
||||||
|
FunctionType *FT = F->getFunctionType();
|
||||||
|
if (FT->getNumParams() != NumParams)
|
||||||
|
return false;
|
||||||
|
|
||||||
Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
// The return type must match what we're looking for.
|
||||||
bool CheckRetType) {
|
Type *RetTy = FT->getReturnType();
|
||||||
|
if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Each parameter must match the return type, and therefore, match every other
|
||||||
|
// parameter too.
|
||||||
|
for (const Type *ParamTy : FT->params())
|
||||||
|
if (ParamTy != RetTy)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shrink double -> float for unary functions like 'floor'.
|
||||||
|
static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
||||||
|
bool CheckRetType) {
|
||||||
Function *Callee = CI->getCalledFunction();
|
Function *Callee = CI->getCalledFunction();
|
||||||
FunctionType *FT = Callee->getFunctionType();
|
if (!matchesFPLibFunctionSignature(Callee, 1, true))
|
||||||
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
|
|
||||||
!FT->getParamType(0)->isDoubleTy())
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
if (CheckRetType) {
|
if (CheckRetType) {
|
||||||
@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
|
|||||||
return B.CreateFPExt(V, B.getDoubleTy());
|
return B.CreateFPExt(V, B.getDoubleTy());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax'
|
/// Shrink double -> float for binary functions like 'fmin/fmax'.
|
||||||
Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
|
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
|
||||||
Function *Callee = CI->getCalledFunction();
|
Function *Callee = CI->getCalledFunction();
|
||||||
FunctionType *FT = Callee->getFunctionType();
|
if (!matchesFPLibFunctionSignature(Callee, 2, true))
|
||||||
// Just make sure this has 2 arguments of the same FP type, which match the
|
|
||||||
// result type.
|
|
||||||
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
|
|
||||||
FT->getParamType(0) != FT->getParamType(1) ||
|
|
||||||
!FT->getParamType(0)->isFloatingPointTy())
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
// If this is something like 'fmin((double)floatval1, (double)floatval2)',
|
// If this is something like 'fmin((double)floatval1, (double)floatval2)',
|
||||||
@ -1400,6 +1414,15 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
|
|||||||
Callee->getIntrinsicID() == Intrinsic::sqrt))
|
Callee->getIntrinsicID() == Intrinsic::sqrt))
|
||||||
Ret = optimizeUnaryDoubleFP(CI, B, true);
|
Ret = optimizeUnaryDoubleFP(CI, B, true);
|
||||||
|
|
||||||
|
// FIXME: Refactor - this check is repeated all over this file and even in the
|
||||||
|
// preceding call to shrink double -> float.
|
||||||
|
|
||||||
|
// Make sure this has 1 argument of FP type, which matches the result type.
|
||||||
|
FunctionType *FT = Callee->getFunctionType();
|
||||||
|
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
|
||||||
|
!FT->getParamType(0)->isFloatingPointTy())
|
||||||
|
return Ret;
|
||||||
|
|
||||||
if (!CI->hasUnsafeAlgebra())
|
if (!CI->hasUnsafeAlgebra())
|
||||||
return Ret;
|
return Ret;
|
||||||
|
|
||||||
|
@ -3548,14 +3548,16 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
|||||||
E = CGF.EmitAnyExpr(EExpr);
|
E = CGF.EmitAnyExpr(EExpr);
|
||||||
CGF.EmitOMPAtomicSimpleUpdateExpr(
|
CGF.EmitOMPAtomicSimpleUpdateExpr(
|
||||||
X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
|
X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
|
||||||
[&CGF, UpExpr, VD, IPriv](RValue XRValue) {
|
[&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
|
||||||
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
||||||
PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
|
PrivateScope.addPrivate(
|
||||||
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
|
VD, [&CGF, VD, XRValue, Loc]() -> Address {
|
||||||
CGF.EmitStoreThroughLValue(
|
Address LHSTemp = CGF.CreateMemTemp(VD->getType());
|
||||||
XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
|
CGF.emitOMPSimpleStore(
|
||||||
return LHSTemp;
|
CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
|
||||||
});
|
VD->getType().getNonReferenceType(), Loc);
|
||||||
|
return LHSTemp;
|
||||||
|
});
|
||||||
(void)PrivateScope.Privatize();
|
(void)PrivateScope.Privatize();
|
||||||
return CGF.EmitAnyExpr(UpExpr);
|
return CGF.EmitAnyExpr(UpExpr);
|
||||||
});
|
});
|
||||||
|
@ -2163,17 +2163,17 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal,
|
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
|
||||||
QualType RValTy, SourceLocation Loc) {
|
QualType RValTy, SourceLocation Loc) {
|
||||||
switch (CGF.getEvaluationKind(LVal.getType())) {
|
switch (getEvaluationKind(LVal.getType())) {
|
||||||
case TEK_Scalar:
|
case TEK_Scalar:
|
||||||
CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue(
|
EmitStoreThroughLValue(RValue::get(convertToScalarValue(
|
||||||
CGF, RVal, RValTy, LVal.getType(), Loc)),
|
*this, RVal, RValTy, LVal.getType(), Loc)),
|
||||||
LVal);
|
LVal);
|
||||||
break;
|
break;
|
||||||
case TEK_Complex:
|
case TEK_Complex:
|
||||||
CGF.EmitStoreOfComplex(
|
EmitStoreOfComplex(
|
||||||
convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal,
|
convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
|
||||||
/*isInit=*/false);
|
/*isInit=*/false);
|
||||||
break;
|
break;
|
||||||
case TEK_Aggregate:
|
case TEK_Aggregate:
|
||||||
@ -2201,7 +2201,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|||||||
// list.
|
// list.
|
||||||
if (IsSeqCst)
|
if (IsSeqCst)
|
||||||
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
|
||||||
emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc);
|
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
||||||
@ -2459,7 +2459,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Emit post-update store to 'v' of old/new 'x' value.
|
// Emit post-update store to 'v' of old/new 'x' value.
|
||||||
emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc);
|
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
|
||||||
// OpenMP, 2.12.6, atomic Construct
|
// OpenMP, 2.12.6, atomic Construct
|
||||||
// Any atomic construct with a seq_cst clause forces the atomically
|
// Any atomic construct with a seq_cst clause forces the atomically
|
||||||
// performed operation to include an implicit flush operation without a
|
// performed operation to include an implicit flush operation without a
|
||||||
|
@ -2211,6 +2211,8 @@ public:
|
|||||||
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
|
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
|
||||||
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
|
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
|
||||||
SmallVectorImpl<llvm::Value *> &CapturedVars);
|
SmallVectorImpl<llvm::Value *> &CapturedVars);
|
||||||
|
void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
|
||||||
|
SourceLocation Loc);
|
||||||
/// \brief Perform element by element copying of arrays with type \a
|
/// \brief Perform element by element copying of arrays with type \a
|
||||||
/// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
|
/// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
|
||||||
/// generated by \a CopyGen.
|
/// generated by \a CopyGen.
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "OrcLazyJIT.h"
|
#include "OrcLazyJIT.h"
|
||||||
#include "RemoteJITUtils.h"
|
#include "RemoteJITUtils.h"
|
||||||
#include "llvm/IR/LLVMContext.h"
|
#include "llvm/IR/LLVMContext.h"
|
||||||
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/ADT/Triple.h"
|
#include "llvm/ADT/Triple.h"
|
||||||
#include "llvm/Bitcode/ReaderWriter.h"
|
#include "llvm/Bitcode/ReaderWriter.h"
|
||||||
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
|
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
|
||||||
@ -741,11 +742,11 @@ std::unique_ptr<FDRPCChannel> launchRemote() {
|
|||||||
ChildPath.reset(new char[ChildExecPath.size() + 1]);
|
ChildPath.reset(new char[ChildExecPath.size() + 1]);
|
||||||
std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);
|
std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);
|
||||||
ChildPath[ChildExecPath.size()] = '\0';
|
ChildPath[ChildExecPath.size()] = '\0';
|
||||||
std::string ChildInStr = std::to_string(PipeFD[0][0]);
|
std::string ChildInStr = utostr(PipeFD[0][0]);
|
||||||
ChildIn.reset(new char[ChildInStr.size() + 1]);
|
ChildIn.reset(new char[ChildInStr.size() + 1]);
|
||||||
std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);
|
std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);
|
||||||
ChildIn[ChildInStr.size()] = '\0';
|
ChildIn[ChildInStr.size()] = '\0';
|
||||||
std::string ChildOutStr = std::to_string(PipeFD[1][1]);
|
std::string ChildOutStr = utostr(PipeFD[1][1]);
|
||||||
ChildOut.reset(new char[ChildOutStr.size() + 1]);
|
ChildOut.reset(new char[ChildOutStr.size() + 1]);
|
||||||
std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);
|
std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);
|
||||||
ChildOut[ChildOutStr.size()] = '\0';
|
ChildOut[ChildOutStr.size()] = '\0';
|
||||||
|
@ -7,4 +7,4 @@
|
|||||||
|
|
||||||
#define CLANG_VENDOR "FreeBSD "
|
#define CLANG_VENDOR "FreeBSD "
|
||||||
|
|
||||||
#define SVN_REVISION "257836"
|
#define SVN_REVISION "258549"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user