Vendor import of llvm release_38 branch r261684:
https://llvm.org/svn/llvm-project/llvm/branches/release_38@261684
This commit is contained in:
parent
3f4bde29a3
commit
d9c9bd8485
@ -1002,11 +1002,13 @@ void RAFast::AllocateBasicBlock() {
|
||||
|
||||
unsigned DefOpEnd = MI->getNumOperands();
|
||||
if (MI->isCall()) {
|
||||
// Spill all virtregs before a call. This serves two purposes: 1. If an
|
||||
// Spill all virtregs before a call. This serves one purpose: If an
|
||||
// exception is thrown, the landing pad is going to expect to find
|
||||
// registers in their spill slots, and 2. we don't have to wade through
|
||||
// all the <imp-def> operands on the call instruction.
|
||||
DefOpEnd = VirtOpEnd;
|
||||
// registers in their spill slots.
|
||||
// Note: although this is appealing to just consider all definitions
|
||||
// as call-clobbered, this is not correct because some of those
|
||||
// definitions may be used later on and we do not want to reuse
|
||||
// those for virtual registers in between.
|
||||
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
|
||||
spillAll(MI);
|
||||
|
||||
|
@ -275,6 +275,18 @@ static bool isCSSave(MachineInstr *MBBI) {
|
||||
MBBI->getOpcode() == AArch64::STPDpre;
|
||||
}
|
||||
|
||||
bool AArch64FrameLowering::canUseAsPrologue(
|
||||
const MachineBasicBlock &MBB) const {
|
||||
const MachineFunction *MF = MBB.getParent();
|
||||
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
|
||||
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
|
||||
// Don't need a scratch register if we're not going to re-align the stack.
|
||||
// Otherwise, we may need a scratch register to be available and we do not
|
||||
// support that for now.
|
||||
return !RegInfo->needsStackRealignment(*MF);
|
||||
}
|
||||
|
||||
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin();
|
||||
|
@ -37,6 +37,8 @@ public:
|
||||
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
|
||||
|
||||
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
|
||||
|
||||
int getFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
unsigned &FrameReg) const override;
|
||||
int resolveFrameIndexReference(const MachineFunction &MF, int FI,
|
||||
|
@ -556,16 +556,42 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
|
||||
}
|
||||
}
|
||||
|
||||
bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
|
||||
bool UseAtEnd,
|
||||
unsigned *ScratchRegister) const {
|
||||
/* This function will do the following:
|
||||
- If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
|
||||
respectively (defaults recommended by the ABI) and return true
|
||||
- If MBB is not an entry block, initialize the register scavenger and look
|
||||
for available registers.
|
||||
- If the defaults (R0/R12) are available, return true
|
||||
- If TwoUniqueRegsRequired is set to true, it looks for two unique
|
||||
registers. Otherwise, look for a single available register.
|
||||
- If the required registers are found, set SR1 and SR2 and return true.
|
||||
- If the required registers are not found, set SR2 or both SR1 and SR2 to
|
||||
PPC::NoRegister and return false.
|
||||
|
||||
Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
|
||||
is not set, this function will attempt to find two different registers, but
|
||||
still return true if only one register is available (and set SR1 == SR2).
|
||||
*/
|
||||
bool
|
||||
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
|
||||
bool UseAtEnd,
|
||||
bool TwoUniqueRegsRequired,
|
||||
unsigned *SR1,
|
||||
unsigned *SR2) const {
|
||||
RegScavenger RS;
|
||||
unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
|
||||
unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
|
||||
unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
|
||||
|
||||
if (ScratchRegister)
|
||||
*ScratchRegister = R0;
|
||||
// Set the defaults for the two scratch registers.
|
||||
if (SR1)
|
||||
*SR1 = R0;
|
||||
|
||||
// If MBB is an entry or exit block, use R0 as the scratch register
|
||||
if (SR2) {
|
||||
assert (SR1 && "Asking for the second scratch register but not the first?");
|
||||
*SR2 = R12;
|
||||
}
|
||||
|
||||
// If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
|
||||
if ((UseAtEnd && MBB->isReturnBlock()) ||
|
||||
(!UseAtEnd && (&MBB->getParent()->front() == MBB)))
|
||||
return true;
|
||||
@ -573,8 +599,8 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
|
||||
RS.enterBasicBlock(MBB);
|
||||
|
||||
if (UseAtEnd && !MBB->empty()) {
|
||||
// The scratch register will be used at the end of the block, so must consider
|
||||
// all registers used within the block
|
||||
// The scratch register will be used at the end of the block, so must
|
||||
// consider all registers used within the block
|
||||
|
||||
MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
|
||||
// If no terminator, back iterator up to previous instruction.
|
||||
@ -584,35 +610,86 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
|
||||
if (MBBI != MBB->begin())
|
||||
RS.forward(MBBI);
|
||||
}
|
||||
|
||||
if (!RS.isRegUsed(R0))
|
||||
|
||||
// If the two registers are available, we're all good.
|
||||
// Note that we only return here if both R0 and R12 are available because
|
||||
// although the function may not require two unique registers, it may benefit
|
||||
// from having two so we should try to provide them.
|
||||
if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
|
||||
return true;
|
||||
|
||||
unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
|
||||
: &PPC::GPRCRegClass);
|
||||
|
||||
// Make sure the register scavenger was able to find an available register
|
||||
// If not, use R0 but return false to indicate no register was available and
|
||||
// R0 must be used (as recommended by the ABI)
|
||||
if (Reg == 0)
|
||||
// Get the list of callee-saved registers for the target.
|
||||
const PPCRegisterInfo *RegInfo =
|
||||
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
|
||||
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
|
||||
|
||||
// Get all the available registers in the block.
|
||||
BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
|
||||
&PPC::GPRCRegClass);
|
||||
|
||||
// We shouldn't use callee-saved registers as scratch registers as they may be
|
||||
// available when looking for a candidate block for shrink wrapping but not
|
||||
// available when the actual prologue/epilogue is being emitted because they
|
||||
// were added as live-in to the prologue block by PrologueEpilogueInserter.
|
||||
for (int i = 0; CSRegs[i]; ++i)
|
||||
BV.reset(CSRegs[i]);
|
||||
|
||||
// Set the first scratch register to the first available one.
|
||||
if (SR1) {
|
||||
int FirstScratchReg = BV.find_first();
|
||||
*SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
|
||||
}
|
||||
|
||||
// If there is another one available, set the second scratch register to that.
|
||||
// Otherwise, set it to either PPC::NoRegister if this function requires two
|
||||
// or to whatever SR1 is set to if this function doesn't require two.
|
||||
if (SR2) {
|
||||
int SecondScratchReg = BV.find_next(*SR1);
|
||||
if (SecondScratchReg != -1)
|
||||
*SR2 = SecondScratchReg;
|
||||
else
|
||||
*SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
|
||||
}
|
||||
|
||||
// Now that we've done our best to provide both registers, double check
|
||||
// whether we were unable to provide enough.
|
||||
if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1))
|
||||
return false;
|
||||
|
||||
if (ScratchRegister)
|
||||
*ScratchRegister = Reg;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// We need a scratch register for spilling LR and for spilling CR. By default,
|
||||
// we use two scratch registers to hide latency. However, if only one scratch
|
||||
// register is available, we can adjust for that by not overlapping the spill
|
||||
// code. However, if we need to realign the stack (i.e. have a base pointer)
|
||||
// and the stack frame is large, we need two scratch registers.
|
||||
bool
|
||||
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
|
||||
const PPCRegisterInfo *RegInfo =
|
||||
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
|
||||
MachineFunction &MF = *(MBB->getParent());
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
unsigned FrameSize = determineFrameLayout(MF, false);
|
||||
int NegFrameSize = -FrameSize;
|
||||
bool IsLargeFrame = !isInt<16>(NegFrameSize);
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
unsigned MaxAlign = MFI->getMaxAlignment();
|
||||
|
||||
return IsLargeFrame && HasBP && MaxAlign > 1;
|
||||
}
|
||||
|
||||
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
|
||||
|
||||
return findScratchRegister(TmpMBB, false, nullptr);
|
||||
return findScratchRegister(TmpMBB, false,
|
||||
twoUniqueScratchRegsRequired(TmpMBB));
|
||||
}
|
||||
|
||||
bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
|
||||
|
||||
return findScratchRegister(TmpMBB, true, nullptr);
|
||||
return findScratchRegister(TmpMBB, true);
|
||||
}
|
||||
|
||||
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
@ -664,6 +741,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
bool MustSaveLR = FI->mustSaveLR();
|
||||
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
|
||||
bool MustSaveCR = !MustSaveCRs.empty();
|
||||
// Do we have a frame pointer and/or base pointer for this function?
|
||||
bool HasFP = hasFP(MF);
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
@ -701,9 +779,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
|
||||
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
|
||||
|
||||
findScratchRegister(&MBB, false, &ScratchReg);
|
||||
assert(ScratchReg && "No scratch register!");
|
||||
|
||||
// Using the same bool variable as below to supress compiler warnings.
|
||||
bool SingleScratchReg =
|
||||
findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
|
||||
&ScratchReg, &TempReg);
|
||||
assert(SingleScratchReg &&
|
||||
"Required number of registers not available in this block");
|
||||
|
||||
SingleScratchReg = ScratchReg == TempReg;
|
||||
|
||||
int LROffset = getReturnSaveOffset();
|
||||
|
||||
int FPOffset = 0;
|
||||
@ -748,13 +832,30 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
|
||||
bool isLargeFrame = !isInt<16>(NegFrameSize);
|
||||
|
||||
assert((isPPC64 || !MustSaveCR) &&
|
||||
"Prologue CR saving supported only in 64-bit mode");
|
||||
|
||||
// If we need to spill the CR and the LR but we don't have two separate
|
||||
// registers available, we must spill them one at a time
|
||||
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
|
||||
// FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
|
||||
// If only one or two CR fields are clobbered, it could be more
|
||||
// efficient to use mfocrf to selectively save just those fields.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
|
||||
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
|
||||
MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
|
||||
.addReg(TempReg, getKillRegState(true))
|
||||
.addImm(8)
|
||||
.addReg(SPReg);
|
||||
}
|
||||
|
||||
if (MustSaveLR)
|
||||
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
|
||||
|
||||
assert((isPPC64 || MustSaveCRs.empty()) &&
|
||||
"Prologue CR saving supported only in 64-bit mode");
|
||||
|
||||
if (!MustSaveCRs.empty()) { // will only occur for PPC64
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
|
||||
// FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
|
||||
// If only one or two CR fields are clobbered, it could be more
|
||||
// efficient to use mfocrf to selectively save just those fields.
|
||||
@ -792,7 +893,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
.addImm(LROffset)
|
||||
.addReg(SPReg);
|
||||
|
||||
if (!MustSaveCRs.empty()) // will only occur for PPC64
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
|
||||
.addReg(TempReg, getKillRegState(true))
|
||||
.addImm(8)
|
||||
@ -811,6 +913,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
.addReg(SPReg);
|
||||
}
|
||||
|
||||
// This condition must be kept in sync with canUseAsPrologue.
|
||||
if (HasBP && MaxAlign > 1) {
|
||||
if (isPPC64)
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
|
||||
@ -828,6 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
.addReg(ScratchReg, RegState::Kill)
|
||||
.addImm(NegFrameSize);
|
||||
} else {
|
||||
assert(!SingleScratchReg && "Only a single scratch reg available");
|
||||
BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
|
||||
.addImm(NegFrameSize >> 16);
|
||||
BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
|
||||
@ -951,7 +1055,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
|
||||
// For SVR4, don't emit a move for the CR spill slot if we haven't
|
||||
// spilled CRs.
|
||||
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
|
||||
&& MustSaveCRs.empty())
|
||||
&& !MustSaveCR)
|
||||
continue;
|
||||
|
||||
// For 64-bit SVR4 when we have spilled CRs, the spill location
|
||||
@ -1005,6 +1109,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
|
||||
bool MustSaveLR = FI->mustSaveLR();
|
||||
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
|
||||
bool MustSaveCR = !MustSaveCRs.empty();
|
||||
// Do we have a frame pointer and/or base pointer for this function?
|
||||
bool HasFP = hasFP(MF);
|
||||
bool HasBP = RegInfo->hasBasePointer(MF);
|
||||
@ -1026,14 +1131,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
: PPC::ADDI );
|
||||
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
|
||||
: PPC::ADD4 );
|
||||
|
||||
|
||||
int LROffset = getReturnSaveOffset();
|
||||
|
||||
int FPOffset = 0;
|
||||
|
||||
findScratchRegister(&MBB, true, &ScratchReg);
|
||||
assert(ScratchReg && "No scratch register!");
|
||||
|
||||
// Using the same bool variable as below to supress compiler warnings.
|
||||
bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
|
||||
&TempReg);
|
||||
assert(SingleScratchReg &&
|
||||
"Could not find an available scratch register");
|
||||
|
||||
SingleScratchReg = ScratchReg == TempReg;
|
||||
|
||||
if (HasFP) {
|
||||
if (isSVR4ABI) {
|
||||
MachineFrameInfo *FFI = MF.getFrameInfo();
|
||||
@ -1130,15 +1240,27 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
assert((isPPC64 || !MustSaveCR) &&
|
||||
"Epilogue CR restoring supported only in 64-bit mode");
|
||||
|
||||
// If we need to save both the LR and the CR and we only have one available
|
||||
// scratch register, we must do them one at a time.
|
||||
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
|
||||
.addImm(8)
|
||||
.addReg(SPReg);
|
||||
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
|
||||
.addReg(TempReg, getKillRegState(i == e-1));
|
||||
}
|
||||
|
||||
if (MustSaveLR)
|
||||
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
|
||||
.addImm(LROffset)
|
||||
.addReg(SPReg);
|
||||
|
||||
assert((isPPC64 || MustSaveCRs.empty()) &&
|
||||
"Epilogue CR restoring supported only in 64-bit mode");
|
||||
|
||||
if (!MustSaveCRs.empty()) // will only occur for PPC64
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
|
||||
.addImm(8)
|
||||
.addReg(SPReg);
|
||||
@ -1160,7 +1282,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
.addImm(BPOffset)
|
||||
.addReg(SPReg);
|
||||
|
||||
if (!MustSaveCRs.empty()) // will only occur for PPC64
|
||||
if (MustSaveCR &&
|
||||
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
|
||||
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
|
||||
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
|
||||
.addReg(TempReg, getKillRegState(i == e-1));
|
||||
|
@ -30,28 +30,41 @@ class PPCFrameLowering: public TargetFrameLowering {
|
||||
const unsigned BasePointerSaveOffset;
|
||||
|
||||
/**
|
||||
* \brief Find a register that can be used in function prologue and epilogue
|
||||
* \brief Find register[s] that can be used in function prologue and epilogue
|
||||
*
|
||||
* Find a register that can be use as the scratch register in function
|
||||
* Find register[s] that can be use as scratch register[s] in function
|
||||
* prologue and epilogue to save various registers (Link Register, Base
|
||||
* Pointer, etc.). Prefer R0, if it is available. If it is not available,
|
||||
* then choose a different register.
|
||||
* Pointer, etc.). Prefer R0/R12, if available. Otherwise choose whatever
|
||||
* register[s] are available.
|
||||
*
|
||||
* This method will return true if an available register was found (including
|
||||
* R0). If no available registers are found, the method returns false and sets
|
||||
* ScratchRegister to R0, as per the recommendation in the ABI.
|
||||
* This method will return true if it is able to find enough unique scratch
|
||||
* registers (1 or 2 depending on the requirement). If it is unable to find
|
||||
* enough available registers in the block, it will return false and set
|
||||
* any passed output parameter that corresponds to a required unique register
|
||||
* to PPC::NoRegister.
|
||||
*
|
||||
* \param[in] MBB The machine basic block to find an available register for
|
||||
* \param[in] UseAtEnd Specify whether the scratch register will be used at
|
||||
* the end of the basic block (i.e., will the scratch
|
||||
* register kill a register defined in the basic block)
|
||||
* \param[out] ScratchRegister The scratch register to use
|
||||
* \return true if a scratch register was found. false of a scratch register
|
||||
* was not found and R0 is being used as the default.
|
||||
* \param[in] TwoUniqueRegsRequired Specify whether this basic block will
|
||||
* require two unique scratch registers.
|
||||
* \param[out] SR1 The scratch register to use
|
||||
* \param[out] SR2 The second scratch register. If this pointer is not null
|
||||
* the function will attempt to set it to an available
|
||||
* register regardless of whether there is a hard requirement
|
||||
* for two unique scratch registers.
|
||||
* \return true if the required number of registers was found.
|
||||
* false if the required number of scratch register weren't available.
|
||||
* If either output parameter refers to a required scratch register
|
||||
* that isn't available, it will be set to an invalid value.
|
||||
*/
|
||||
bool findScratchRegister(MachineBasicBlock *MBB,
|
||||
bool UseAtEnd,
|
||||
unsigned *ScratchRegister) const;
|
||||
bool TwoUniqueRegsRequired = false,
|
||||
unsigned *SR1 = nullptr,
|
||||
unsigned *SR2 = nullptr) const;
|
||||
bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const;
|
||||
|
||||
public:
|
||||
PPCFrameLowering(const PPCSubtarget &STI);
|
||||
|
@ -22227,6 +22227,35 @@ X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
// So, here we replace TLSADDR with the sequence:
|
||||
// adjust_stackdown -> TLSADDR -> adjust_stackup.
|
||||
// We need this because TLSADDR is lowered into calls
|
||||
// inside MC, therefore without the two markers shrink-wrapping
|
||||
// may push the prologue/epilogue pass them.
|
||||
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
MachineFunction &MF = *BB->getParent();
|
||||
|
||||
// Emit CALLSEQ_START right before the instruction.
|
||||
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
|
||||
MachineInstrBuilder CallseqStart =
|
||||
BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0);
|
||||
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
|
||||
|
||||
// Emit CALLSEQ_END right after the instruction.
|
||||
// We don't call erase from parent because we want to keep the
|
||||
// original instruction around.
|
||||
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
|
||||
MachineInstrBuilder CallseqEnd =
|
||||
BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
|
||||
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
|
||||
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
@ -22607,6 +22636,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
case X86::TCRETURNri64:
|
||||
case X86::TCRETURNmi64:
|
||||
return BB;
|
||||
case X86::TLS_addr32:
|
||||
case X86::TLS_addr64:
|
||||
case X86::TLS_base_addr32:
|
||||
case X86::TLS_base_addr64:
|
||||
return EmitLoweredTLSAddr(MI, BB);
|
||||
case X86::WIN_ALLOCA:
|
||||
return EmitLoweredWinAlloca(MI, BB);
|
||||
case X86::CATCHRET:
|
||||
|
@ -1129,6 +1129,9 @@ namespace llvm {
|
||||
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
|
||||
MachineBasicBlock *BB) const;
|
||||
|
||||
|
@ -436,7 +436,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [ESP] in {
|
||||
usesCustomInserter = 1, Uses = [ESP] in {
|
||||
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
|
||||
"# TLS_addr32",
|
||||
[(X86tlsaddr tls32addr:$sym)]>,
|
||||
@ -456,7 +456,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [RSP] in {
|
||||
usesCustomInserter = 1, Uses = [RSP] in {
|
||||
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
|
||||
"# TLS_addr64",
|
||||
[(X86tlsaddr tls64addr:$sym)]>,
|
||||
|
@ -522,10 +522,10 @@ bb1:
|
||||
|
||||
; CHECK-LABEL: realign_conditional2
|
||||
; Extra realignment in the prologue (performance issue).
|
||||
; CHECK: tbz {{.*}} .[[LABEL:.*]]
|
||||
; CHECK: sub x9, sp, #32 // =32
|
||||
; CHECK: and sp, x9, #0xffffffffffffffe0
|
||||
; CHECK: mov x19, sp
|
||||
; CHECK: tbz {{.*}} .[[LABEL:.*]]
|
||||
; Stack is realigned in a non-entry BB.
|
||||
; CHECK: sub [[REG:x[01-9]+]], sp, #64
|
||||
; CHECK: and sp, [[REG]], #0xffffffffffffffe0
|
||||
|
@ -630,3 +630,88 @@ loop2b: ; preds = %loop1
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Don't do shrink-wrapping when we need to re-align the stack pointer.
|
||||
; See bug 26642.
|
||||
; CHECK-LABEL: stack_realign:
|
||||
; CHECK-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
|
||||
; CHECK-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
|
||||
; CHECK: stp x29, x30, [sp, #-16]!
|
||||
; CHECK: mov x29, sp
|
||||
; CHECK: sub x{{[0-9]+}}, sp, #16
|
||||
; CHECK-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
|
||||
; CHECK-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
|
||||
; CHECK-DAG: str w[[LSL1]],
|
||||
; CHECK-DAG: str w[[LSL2]],
|
||||
|
||||
define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) {
|
||||
%tmp = alloca i32, align 32
|
||||
%shl1 = shl i32 %a, %b
|
||||
%shl2 = shl i32 %b, %a
|
||||
%tmp2 = icmp slt i32 %a, %b
|
||||
br i1 %tmp2, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
%tmp4 = load i32, i32* %tmp
|
||||
br label %false
|
||||
|
||||
false:
|
||||
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
|
||||
store i32 %shl1, i32* %ptr1
|
||||
store i32 %shl2, i32* %ptr2
|
||||
ret i32 %tmp.0
|
||||
}
|
||||
|
||||
; Re-aligned stack pointer with all caller-save regs live. See bug
|
||||
; 26642. In this case we currently avoid shrink wrapping because
|
||||
; ensuring we have a scratch register to re-align the stack pointer is
|
||||
; too complicated. Output should be the same for both enabled and
|
||||
; disabled shrink wrapping.
|
||||
; CHECK-LABEL: stack_realign2:
|
||||
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]!
|
||||
; CHECK: add x29, sp, #{{[0-9]+}}
|
||||
; CHECK: lsl {{w[0-9]+}}, w0, w1
|
||||
|
||||
define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) {
|
||||
%tmp = alloca i32, align 32
|
||||
%tmp1 = shl i32 %a, %b
|
||||
%tmp2 = shl i32 %b, %a
|
||||
%tmp3 = lshr i32 %a, %b
|
||||
%tmp4 = lshr i32 %b, %a
|
||||
%tmp5 = add i32 %b, %a
|
||||
%tmp6 = sub i32 %b, %a
|
||||
%tmp7 = add i32 %tmp1, %tmp2
|
||||
%tmp8 = sub i32 %tmp2, %tmp3
|
||||
%tmp9 = add i32 %tmp3, %tmp4
|
||||
%tmp10 = add i32 %tmp4, %tmp5
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
br i1 %cmp, label %true, label %false
|
||||
|
||||
true:
|
||||
store i32 %a, i32* %tmp, align 4
|
||||
call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
|
||||
br label %false
|
||||
|
||||
false:
|
||||
store i32 %tmp1, i32* %ptr1, align 4
|
||||
store i32 %tmp2, i32* %ptr2, align 4
|
||||
store i32 %tmp3, i32* %ptr3, align 4
|
||||
store i32 %tmp4, i32* %ptr4, align 4
|
||||
store i32 %tmp5, i32* %ptr5, align 4
|
||||
store i32 %tmp6, i32* %ptr6, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1
|
||||
store i32 %a, i32* %idx1, align 4
|
||||
%idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2
|
||||
store i32 %b, i32* %idx2, align 4
|
||||
%idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3
|
||||
store i32 %tmp7, i32* %idx3, align 4
|
||||
%idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4
|
||||
store i32 %tmp8, i32* %idx4, align 4
|
||||
%idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5
|
||||
store i32 %tmp9, i32* %idx5, align 4
|
||||
%idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6
|
||||
store i32 %tmp10, i32* %idx6, align 4
|
||||
|
||||
ret void
|
||||
}
|
||||
|
@ -13,7 +13,9 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: bl num_entries
|
||||
; CHECK: movs [[R1:r[0-9]+]], #7
|
||||
; Any register is actually valid here, but turns out we use lr,
|
||||
; because we do not have the kill flag on R0.
|
||||
; CHECK: mov.w [[R1:lr]], #7
|
||||
; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
|
||||
; CHECK: bic [[R0]], [[R0]], #7
|
||||
; CHECK: lsrs r4, [[R0]], #2
|
||||
|
118
test/CodeGen/PowerPC/pr26690.ll
Normal file
118
test/CodeGen/PowerPC/pr26690.ll
Normal file
@ -0,0 +1,118 @@
|
||||
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
||||
|
||||
%struct.anon = type { %struct.anon.0, %struct.anon.1 }
|
||||
%struct.anon.0 = type { i32 }
|
||||
%struct.anon.1 = type { i32 }
|
||||
|
||||
@i = common global i32 0, align 4
|
||||
@b = common global i32* null, align 8
|
||||
@c = common global i32 0, align 4
|
||||
@a = common global i32 0, align 4
|
||||
@h = common global i32 0, align 4
|
||||
@g = common global i32 0, align 4
|
||||
@j = common global i32 0, align 4
|
||||
@f = common global %struct.anon zeroinitializer, align 4
|
||||
@d = common global i32 0, align 4
|
||||
@e = common global i32 0, align 4
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define signext i32 @fn1(i32* nocapture %p1, i32 signext %p2, i32* nocapture %p3) {
|
||||
entry:
|
||||
%0 = load i32, i32* @i, align 4, !tbaa !1
|
||||
%cond = icmp eq i32 %0, 8
|
||||
br i1 %cond, label %if.end16, label %while.cond.preheader
|
||||
|
||||
while.cond.preheader: ; preds = %entry
|
||||
%1 = load i32*, i32** @b, align 8, !tbaa !5
|
||||
%2 = load i32, i32* %1, align 4, !tbaa !1
|
||||
%tobool18 = icmp eq i32 %2, 0
|
||||
br i1 %tobool18, label %while.end, label %while.body.lr.ph
|
||||
|
||||
while.body.lr.ph: ; preds = %while.cond.preheader
|
||||
%.pre = load i32, i32* @c, align 4, !tbaa !1
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.backedge, %while.body.lr.ph
|
||||
switch i32 %.pre, label %while.body.backedge [
|
||||
i32 0, label %sw.bb1
|
||||
i32 8, label %sw.bb1
|
||||
i32 6, label %sw.bb1
|
||||
i32 24, label %while.cond.backedge
|
||||
]
|
||||
|
||||
while.body.backedge: ; preds = %while.body, %while.cond.backedge
|
||||
br label %while.body
|
||||
|
||||
sw.bb1: ; preds = %while.body, %while.body, %while.body
|
||||
store i32 2, i32* @a, align 4, !tbaa !1
|
||||
br label %while.cond.backedge
|
||||
|
||||
while.cond.backedge: ; preds = %while.body, %sw.bb1
|
||||
store i32 4, i32* @a, align 4, !tbaa !1
|
||||
%.pre19 = load i32, i32* %1, align 4, !tbaa !1
|
||||
%tobool = icmp eq i32 %.pre19, 0
|
||||
br i1 %tobool, label %while.end.loopexit, label %while.body.backedge
|
||||
|
||||
while.end.loopexit: ; preds = %while.cond.backedge
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %while.cond.preheader
|
||||
%3 = load i32, i32* @h, align 4, !tbaa !1
|
||||
%mul = mul nsw i32 %0, %3
|
||||
%4 = load i32, i32* @g, align 4, !tbaa !1
|
||||
%mul4 = mul nsw i32 %mul, %4
|
||||
store i32 %mul4, i32* @j, align 4, !tbaa !1
|
||||
%5 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 0, i32 0), align 4, !tbaa !7
|
||||
%tobool5 = icmp eq i32 %5, 0
|
||||
br i1 %tobool5, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %while.end
|
||||
%div = sdiv i32 %5, %mul
|
||||
store i32 %div, i32* @g, align 4, !tbaa !1
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %while.end, %if.then
|
||||
%6 = phi i32 [ %4, %while.end ], [ %div, %if.then ]
|
||||
%7 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 1, i32 0), align 4, !tbaa !10
|
||||
%tobool7 = icmp ne i32 %7, 0
|
||||
%tobool8 = icmp ne i32 %mul4, 0
|
||||
%or.cond = and i1 %tobool7, %tobool8
|
||||
%tobool10 = icmp ne i32 %0, 0
|
||||
%or.cond17 = and i1 %or.cond, %tobool10
|
||||
br i1 %or.cond17, label %if.then11, label %if.end13
|
||||
|
||||
if.then11: ; preds = %if.end
|
||||
store i32 %3, i32* @d, align 4, !tbaa !1
|
||||
%8 = load i32, i32* @e, align 4, !tbaa !1
|
||||
store i32 %8, i32* %p3, align 4, !tbaa !1
|
||||
%.pre20 = load i32, i32* @g, align 4, !tbaa !1
|
||||
br label %if.end13
|
||||
|
||||
if.end13: ; preds = %if.then11, %if.end
|
||||
%9 = phi i32 [ %.pre20, %if.then11 ], [ %6, %if.end ]
|
||||
%tobool14 = icmp eq i32 %9, 0
|
||||
br i1 %tobool14, label %if.end16, label %if.then15
|
||||
|
||||
if.then15: ; preds = %if.end13
|
||||
store i32 %p2, i32* %p1, align 4, !tbaa !1
|
||||
br label %if.end16
|
||||
|
||||
if.end16: ; preds = %entry, %if.end13, %if.then15
|
||||
ret i32 2
|
||||
}
|
||||
|
||||
; CHECK: mfcr {{[0-9]+}}
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"clang version 3.9.0 (trunk 261520)"}
|
||||
!1 = !{!2, !2, i64 0}
|
||||
!2 = !{!"int", !3, i64 0}
|
||||
!3 = !{!"omnipotent char", !4, i64 0}
|
||||
!4 = !{!"Simple C/C++ TBAA"}
|
||||
!5 = !{!6, !6, i64 0}
|
||||
!6 = !{!"any pointer", !3, i64 0}
|
||||
!7 = !{!8, !2, i64 0}
|
||||
!8 = !{!"", !9, i64 0, !9, i64 4}
|
||||
!9 = !{!"", !2, i64 0}
|
||||
!10 = !{!8, !2, i64 4}
|
26
test/CodeGen/X86/i386-tlscall-fastregalloc.ll
Normal file
26
test/CodeGen/X86/i386-tlscall-fastregalloc.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llc %s -o - -O0 -regalloc=fast | FileCheck %s
|
||||
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
||||
target triple = "i386-apple-macosx10.10"
|
||||
|
||||
@c = external global i8, align 1
|
||||
@p = thread_local global i8* null, align 4
|
||||
|
||||
; Check that regalloc fast correctly preserves EAX that is set by the TLS call
|
||||
; until the actual use.
|
||||
; PR26485.
|
||||
;
|
||||
; CHECK-LABEL: f:
|
||||
; Get p.
|
||||
; CHECK: movl _p@{{[0-9a-zA-Z]+}}, [[P_ADDR:%[a-z]+]]
|
||||
; CHECK-NEXT: calll *([[P_ADDR]])
|
||||
; At this point eax contiains the address of p.
|
||||
; Load c address.
|
||||
; Make sure we do not clobber eax.
|
||||
; CHECK-NEXT: movl L_c{{[^,]*}}, [[C_ADDR:%e[b-z]x+]]
|
||||
; Store c address into p.
|
||||
; CHECK-NEXT: movl [[C_ADDR]], (%eax)
|
||||
define void @f() #0 {
|
||||
entry:
|
||||
store i8* @c, i8** @p, align 4
|
||||
ret void
|
||||
}
|
60
test/CodeGen/X86/tls-shrink-wrapping.ll
Normal file
60
test/CodeGen/X86/tls-shrink-wrapping.ll
Normal file
@ -0,0 +1,60 @@
|
||||
; Testcase generated from the following code:
|
||||
; extern __thread int i;
|
||||
; void f();
|
||||
; int g(void) {
|
||||
; if (i) {
|
||||
; i = 0;
|
||||
; f();
|
||||
; }
|
||||
; return i;
|
||||
; }
|
||||
; We want to make sure that TLS variables are not accessed before
|
||||
; the stack frame is set up.
|
||||
|
||||
; RUN: llc < %s -relocation-model=pic | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-freebsd11.0"
|
||||
|
||||
@i = external thread_local global i32, align 4
|
||||
|
||||
define i32 @g() #0 {
|
||||
entry:
|
||||
%tmp = load i32, i32* @i, align 4
|
||||
%tobool = icmp eq i32 %tmp, 0
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
store i32 0, i32* @i, align 4
|
||||
tail call void (...) @f() #2
|
||||
%.pre = load i32, i32* @i, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %entry
|
||||
%tmp1 = phi i32 [ 0, %entry ], [ %.pre, %if.then ]
|
||||
ret i32 %tmp1
|
||||
}
|
||||
|
||||
; CHECK: g: # @g
|
||||
; CHECK-NEXT: .cfi_startproc
|
||||
; CHECK-NEXT: # BB#0: # %entry
|
||||
; CHECK-NEXT: pushq %rbp
|
||||
; CHECK-NEXT: .Ltmp0:
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .Ltmp1:
|
||||
; CHECK-NEXT: .cfi_offset %rbp, -16
|
||||
; CHECK-NEXT: movq %rsp, %rbp
|
||||
; CHECK-NEXT: .Ltmp2:
|
||||
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
||||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .Ltmp3:
|
||||
; CHECK-NEXT: .cfi_offset %rbx, -24
|
||||
; CHECK-NEXT: data16
|
||||
; CHECK-NEXT: leaq i@TLSGD(%rip), %rdi
|
||||
|
||||
declare void @f(...) #1
|
||||
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind }
|
Loading…
x
Reference in New Issue
Block a user