Vendor import of llvm release_38 branch r261684:

https://llvm.org/svn/llvm-project/llvm/branches/release_38@261684
This commit is contained in:
Dimitry Andric 2016-02-24 21:32:58 +00:00
parent 3f4bde29a3
commit d9c9bd8485
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor/llvm/dist/; revision=296003
svn path=/vendor/llvm/llvm-release_38-r261684/; revision=296004; tag=vendor/llvm/llvm-release_38-r261684
14 changed files with 540 additions and 60 deletions

View File

@ -1002,11 +1002,13 @@ void RAFast::AllocateBasicBlock() {
unsigned DefOpEnd = MI->getNumOperands();
if (MI->isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
// Spill all virtregs before a call. This serves one purpose: If an
// exception is thrown, the landing pad is going to expect to find
// registers in their spill slots, and 2. we don't have to wade through
// all the <imp-def> operands on the call instruction.
DefOpEnd = VirtOpEnd;
// registers in their spill slots.
// Note: although this is appealing to just consider all definitions
// as call-clobbered, this is not correct because some of those
// definitions may be used later on and we do not want to reuse
// those for virtual registers in between.
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);

View File

@ -275,6 +275,18 @@ static bool isCSSave(MachineInstr *MBBI) {
MBBI->getOpcode() == AArch64::STPDpre;
}
bool AArch64FrameLowering::canUseAsPrologue(
const MachineBasicBlock &MBB) const {
const MachineFunction *MF = MBB.getParent();
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Don't need a scratch register if we're not going to re-align the stack.
// Otherwise, we may need a scratch register to be available and we do not
// support that for now.
return !RegInfo->needsStackRealignment(*MF);
}
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();

View File

@ -37,6 +37,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const override;
int resolveFrameIndexReference(const MachineFunction &MF, int FI,

View File

@ -556,16 +556,42 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
}
}
bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
unsigned *ScratchRegister) const {
/* This function will do the following:
- If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
respectively (defaults recommended by the ABI) and return true
- If MBB is not an entry block, initialize the register scavenger and look
for available registers.
- If the defaults (R0/R12) are available, return true
- If TwoUniqueRegsRequired is set to true, it looks for two unique
registers. Otherwise, look for a single available register.
- If the required registers are found, set SR1 and SR2 and return true.
- If the required registers are not found, set SR2 or both SR1 and SR2 to
PPC::NoRegister and return false.
Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
is not set, this function will attempt to find two different registers, but
still return true if only one register is available (and set SR1 == SR2).
*/
bool
PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
bool TwoUniqueRegsRequired,
unsigned *SR1,
unsigned *SR2) const {
RegScavenger RS;
unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
if (ScratchRegister)
*ScratchRegister = R0;
// Set the defaults for the two scratch registers.
if (SR1)
*SR1 = R0;
// If MBB is an entry or exit block, use R0 as the scratch register
if (SR2) {
assert (SR1 && "Asking for the second scratch register but not the first?");
*SR2 = R12;
}
// If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
if ((UseAtEnd && MBB->isReturnBlock()) ||
(!UseAtEnd && (&MBB->getParent()->front() == MBB)))
return true;
@ -573,8 +599,8 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
RS.enterBasicBlock(MBB);
if (UseAtEnd && !MBB->empty()) {
// The scratch register will be used at the end of the block, so must consider
// all registers used within the block
// The scratch register will be used at the end of the block, so must
// consider all registers used within the block
MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
// If no terminator, back iterator up to previous instruction.
@ -584,35 +610,86 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
if (MBBI != MBB->begin())
RS.forward(MBBI);
}
if (!RS.isRegUsed(R0))
// If the two registers are available, we're all good.
// Note that we only return here if both R0 and R12 are available because
// although the function may not require two unique registers, it may benefit
// from having two so we should try to provide them.
if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
return true;
unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);
// Make sure the register scavenger was able to find an available register
// If not, use R0 but return false to indicate no register was available and
// R0 must be used (as recommended by the ABI)
if (Reg == 0)
// Get the list of callee-saved registers for the target.
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
// Get all the available registers in the block.
BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
// We shouldn't use callee-saved registers as scratch registers as they may be
// available when looking for a candidate block for shrink wrapping but not
// available when the actual prologue/epilogue is being emitted because they
// were added as live-in to the prologue block by PrologueEpilogueInserter.
for (int i = 0; CSRegs[i]; ++i)
BV.reset(CSRegs[i]);
// Set the first scratch register to the first available one.
if (SR1) {
int FirstScratchReg = BV.find_first();
*SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
}
// If there is another one available, set the second scratch register to that.
// Otherwise, set it to either PPC::NoRegister if this function requires two
// or to whatever SR1 is set to if this function doesn't require two.
if (SR2) {
int SecondScratchReg = BV.find_next(*SR1);
if (SecondScratchReg != -1)
*SR2 = SecondScratchReg;
else
*SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
}
// Now that we've done our best to provide both registers, double check
// whether we were unable to provide enough.
if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1))
return false;
if (ScratchRegister)
*ScratchRegister = Reg;
return true;
}
// We need a scratch register for spilling LR and for spilling CR. By default,
// we use two scratch registers to hide latency. However, if only one scratch
// register is available, we can adjust for that by not overlapping the spill
// code. However, if we need to realign the stack (i.e. have a base pointer)
// and the stack frame is large, we need two scratch registers.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo =
static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned FrameSize = determineFrameLayout(MF, false);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned MaxAlign = MFI->getMaxAlignment();
return IsLargeFrame && HasBP && MaxAlign > 1;
}
bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
return findScratchRegister(TmpMBB, false, nullptr);
return findScratchRegister(TmpMBB, false,
twoUniqueScratchRegsRequired(TmpMBB));
}
bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
return findScratchRegister(TmpMBB, true, nullptr);
return findScratchRegister(TmpMBB, true);
}
void PPCFrameLowering::emitPrologue(MachineFunction &MF,
@ -664,6 +741,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
@ -701,9 +779,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
"FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
findScratchRegister(&MBB, false, &ScratchReg);
assert(ScratchReg && "No scratch register!");
// Using the same bool variable as below to supress compiler warnings.
bool SingleScratchReg =
findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
&ScratchReg, &TempReg);
assert(SingleScratchReg &&
"Required number of registers not available in this block");
SingleScratchReg = ScratchReg == TempReg;
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
@ -748,13 +832,30 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
bool isLargeFrame = !isInt<16>(NegFrameSize);
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");
// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
// FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
// If only one or two CR fields are clobbered, it could be more
// efficient to use mfocrf to selectively save just those fields.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
.addImm(8)
.addReg(SPReg);
}
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
assert((isPPC64 || MustSaveCRs.empty()) &&
"Prologue CR saving supported only in 64-bit mode");
if (!MustSaveCRs.empty()) { // will only occur for PPC64
if (MustSaveCR &&
!(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
// FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
// If only one or two CR fields are clobbered, it could be more
// efficient to use mfocrf to selectively save just those fields.
@ -792,7 +893,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(LROffset)
.addReg(SPReg);
if (!MustSaveCRs.empty()) // will only occur for PPC64
if (MustSaveCR &&
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
.addReg(TempReg, getKillRegState(true))
.addImm(8)
@ -811,6 +913,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(SPReg);
}
// This condition must be kept in sync with canUseAsPrologue.
if (HasBP && MaxAlign > 1) {
if (isPPC64)
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
@ -828,6 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(ScratchReg, RegState::Kill)
.addImm(NegFrameSize);
} else {
assert(!SingleScratchReg && "Only a single scratch reg available");
BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
.addImm(NegFrameSize >> 16);
BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
@ -951,7 +1055,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
&& MustSaveCRs.empty())
&& !MustSaveCR)
continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
@ -1005,6 +1109,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
bool MustSaveCR = !MustSaveCRs.empty();
// Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
bool HasBP = RegInfo->hasBasePointer(MF);
@ -1026,14 +1131,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::ADDI );
const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
: PPC::ADD4 );
int LROffset = getReturnSaveOffset();
int FPOffset = 0;
findScratchRegister(&MBB, true, &ScratchReg);
assert(ScratchReg && "No scratch register!");
// Using the same bool variable as below to supress compiler warnings.
bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
&TempReg);
assert(SingleScratchReg &&
"Could not find an available scratch register");
SingleScratchReg = ScratchReg == TempReg;
if (HasFP) {
if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
@ -1130,15 +1240,27 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
assert((isPPC64 || !MustSaveCR) &&
"Epilogue CR restoring supported only in 64-bit mode");
// If we need to save both the LR and the CR and we only have one available
// scratch register, we must do them one at a time.
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
.addImm(8)
.addReg(SPReg);
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));
}
if (MustSaveLR)
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
.addImm(LROffset)
.addReg(SPReg);
assert((isPPC64 || MustSaveCRs.empty()) &&
"Epilogue CR restoring supported only in 64-bit mode");
if (!MustSaveCRs.empty()) // will only occur for PPC64
if (MustSaveCR &&
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
.addImm(8)
.addReg(SPReg);
@ -1160,7 +1282,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addImm(BPOffset)
.addReg(SPReg);
if (!MustSaveCRs.empty()) // will only occur for PPC64
if (MustSaveCR &&
!(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
.addReg(TempReg, getKillRegState(i == e-1));

View File

@ -30,28 +30,41 @@ class PPCFrameLowering: public TargetFrameLowering {
const unsigned BasePointerSaveOffset;
/**
* \brief Find a register that can be used in function prologue and epilogue
* \brief Find register[s] that can be used in function prologue and epilogue
*
* Find a register that can be use as the scratch register in function
* Find register[s] that can be use as scratch register[s] in function
* prologue and epilogue to save various registers (Link Register, Base
* Pointer, etc.). Prefer R0, if it is available. If it is not available,
* then choose a different register.
* Pointer, etc.). Prefer R0/R12, if available. Otherwise choose whatever
* register[s] are available.
*
* This method will return true if an available register was found (including
* R0). If no available registers are found, the method returns false and sets
* ScratchRegister to R0, as per the recommendation in the ABI.
* This method will return true if it is able to find enough unique scratch
* registers (1 or 2 depending on the requirement). If it is unable to find
* enough available registers in the block, it will return false and set
* any passed output parameter that corresponds to a required unique register
* to PPC::NoRegister.
*
* \param[in] MBB The machine basic block to find an available register for
* \param[in] UseAtEnd Specify whether the scratch register will be used at
* the end of the basic block (i.e., will the scratch
* register kill a register defined in the basic block)
* \param[out] ScratchRegister The scratch register to use
* \return true if a scratch register was found. false of a scratch register
* was not found and R0 is being used as the default.
* \param[in] TwoUniqueRegsRequired Specify whether this basic block will
* require two unique scratch registers.
* \param[out] SR1 The scratch register to use
* \param[out] SR2 The second scratch register. If this pointer is not null
* the function will attempt to set it to an available
* register regardless of whether there is a hard requirement
* for two unique scratch registers.
* \return true if the required number of registers was found.
* false if the required number of scratch register weren't available.
* If either output parameter refers to a required scratch register
* that isn't available, it will be set to an invalid value.
*/
bool findScratchRegister(MachineBasicBlock *MBB,
bool UseAtEnd,
unsigned *ScratchRegister) const;
bool TwoUniqueRegsRequired = false,
unsigned *SR1 = nullptr,
unsigned *SR2 = nullptr) const;
bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const;
public:
PPCFrameLowering(const PPCSubtarget &STI);

View File

@ -22227,6 +22227,35 @@ X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI,
MachineBasicBlock *BB) const {
// So, here we replace TLSADDR with the sequence:
// adjust_stackdown -> TLSADDR -> adjust_stackup.
// We need this because TLSADDR is lowered into calls
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
MachineFunction &MF = *BB->getParent();
// Emit CALLSEQ_START right before the instruction.
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
MachineInstrBuilder CallseqStart =
BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0);
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
// Emit CALLSEQ_END right after the instruction.
// We don't call erase from parent because we want to keep the
// original instruction around.
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MachineInstrBuilder CallseqEnd =
BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const {
@ -22607,6 +22636,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
return BB;
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return EmitLoweredTLSAddr(MI, BB);
case X86::WIN_ALLOCA:
return EmitLoweredWinAlloca(MI, BB);
case X86::CATCHRET:

View File

@ -1129,6 +1129,9 @@ namespace llvm {
MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const;

View File

@ -436,7 +436,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
usesCustomInserter = 1, Uses = [ESP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
@ -456,7 +456,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in {
usesCustomInserter = 1, Uses = [RSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,

View File

@ -522,10 +522,10 @@ bb1:
; CHECK-LABEL: realign_conditional2
; Extra realignment in the prologue (performance issue).
; CHECK: tbz {{.*}} .[[LABEL:.*]]
; CHECK: sub x9, sp, #32 // =32
; CHECK: and sp, x9, #0xffffffffffffffe0
; CHECK: mov x19, sp
; CHECK: tbz {{.*}} .[[LABEL:.*]]
; Stack is realigned in a non-entry BB.
; CHECK: sub [[REG:x[01-9]+]], sp, #64
; CHECK: and sp, [[REG]], #0xffffffffffffffe0

View File

@ -630,3 +630,88 @@ loop2b: ; preds = %loop1
end:
ret void
}
; Don't do shrink-wrapping when we need to re-align the stack pointer.
; See bug 26642.
; CHECK-LABEL: stack_realign:
; CHECK-NOT: lsl w[[LSL1:[0-9]+]], w0, w1
; CHECK-NOT: lsl w[[LSL2:[0-9]+]], w1, w0
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
; CHECK: sub x{{[0-9]+}}, sp, #16
; CHECK-DAG: lsl w[[LSL1:[0-9]+]], w0, w1
; CHECK-DAG: lsl w[[LSL2:[0-9]+]], w1, w0
; CHECK-DAG: str w[[LSL1]],
; CHECK-DAG: str w[[LSL2]],
define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) {
%tmp = alloca i32, align 32
%shl1 = shl i32 %a, %b
%shl2 = shl i32 %b, %a
%tmp2 = icmp slt i32 %a, %b
br i1 %tmp2, label %true, label %false
true:
store i32 %a, i32* %tmp, align 4
%tmp4 = load i32, i32* %tmp
br label %false
false:
%tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
store i32 %shl1, i32* %ptr1
store i32 %shl2, i32* %ptr2
ret i32 %tmp.0
}
; Re-aligned stack pointer with all caller-save regs live. See bug
; 26642. In this case we currently avoid shrink wrapping because
; ensuring we have a scratch register to re-align the stack pointer is
; too complicated. Output should be the same for both enabled and
; disabled shrink wrapping.
; CHECK-LABEL: stack_realign2:
; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]!
; CHECK: add x29, sp, #{{[0-9]+}}
; CHECK: lsl {{w[0-9]+}}, w0, w1
define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) {
%tmp = alloca i32, align 32
%tmp1 = shl i32 %a, %b
%tmp2 = shl i32 %b, %a
%tmp3 = lshr i32 %a, %b
%tmp4 = lshr i32 %b, %a
%tmp5 = add i32 %b, %a
%tmp6 = sub i32 %b, %a
%tmp7 = add i32 %tmp1, %tmp2
%tmp8 = sub i32 %tmp2, %tmp3
%tmp9 = add i32 %tmp3, %tmp4
%tmp10 = add i32 %tmp4, %tmp5
%cmp = icmp slt i32 %a, %b
br i1 %cmp, label %true, label %false
true:
store i32 %a, i32* %tmp, align 4
call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind
br label %false
false:
store i32 %tmp1, i32* %ptr1, align 4
store i32 %tmp2, i32* %ptr2, align 4
store i32 %tmp3, i32* %ptr3, align 4
store i32 %tmp4, i32* %ptr4, align 4
store i32 %tmp5, i32* %ptr5, align 4
store i32 %tmp6, i32* %ptr6, align 4
%idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1
store i32 %a, i32* %idx1, align 4
%idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2
store i32 %b, i32* %idx2, align 4
%idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3
store i32 %tmp7, i32* %idx3, align 4
%idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4
store i32 %tmp8, i32* %idx4, align 4
%idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5
store i32 %tmp9, i32* %idx5, align 4
%idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6
store i32 %tmp10, i32* %idx6, align 4
ret void
}

View File

@ -13,7 +13,9 @@ entry:
}
; CHECK: bl num_entries
; CHECK: movs [[R1:r[0-9]+]], #7
; Any register is actually valid here, but turns out we use lr,
; because we do not have the kill flag on R0.
; CHECK: mov.w [[R1:lr]], #7
; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2
; CHECK: bic [[R0]], [[R0]], #7
; CHECK: lsrs r4, [[R0]], #2

View File

@ -0,0 +1,118 @@
; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
%struct.anon = type { %struct.anon.0, %struct.anon.1 }
%struct.anon.0 = type { i32 }
%struct.anon.1 = type { i32 }
@i = common global i32 0, align 4
@b = common global i32* null, align 8
@c = common global i32 0, align 4
@a = common global i32 0, align 4
@h = common global i32 0, align 4
@g = common global i32 0, align 4
@j = common global i32 0, align 4
@f = common global %struct.anon zeroinitializer, align 4
@d = common global i32 0, align 4
@e = common global i32 0, align 4
; Function Attrs: norecurse nounwind
define signext i32 @fn1(i32* nocapture %p1, i32 signext %p2, i32* nocapture %p3) {
entry:
%0 = load i32, i32* @i, align 4, !tbaa !1
%cond = icmp eq i32 %0, 8
br i1 %cond, label %if.end16, label %while.cond.preheader
while.cond.preheader: ; preds = %entry
%1 = load i32*, i32** @b, align 8, !tbaa !5
%2 = load i32, i32* %1, align 4, !tbaa !1
%tobool18 = icmp eq i32 %2, 0
br i1 %tobool18, label %while.end, label %while.body.lr.ph
while.body.lr.ph: ; preds = %while.cond.preheader
%.pre = load i32, i32* @c, align 4, !tbaa !1
br label %while.body
while.body: ; preds = %while.body.backedge, %while.body.lr.ph
switch i32 %.pre, label %while.body.backedge [
i32 0, label %sw.bb1
i32 8, label %sw.bb1
i32 6, label %sw.bb1
i32 24, label %while.cond.backedge
]
while.body.backedge: ; preds = %while.body, %while.cond.backedge
br label %while.body
sw.bb1: ; preds = %while.body, %while.body, %while.body
store i32 2, i32* @a, align 4, !tbaa !1
br label %while.cond.backedge
while.cond.backedge: ; preds = %while.body, %sw.bb1
store i32 4, i32* @a, align 4, !tbaa !1
%.pre19 = load i32, i32* %1, align 4, !tbaa !1
%tobool = icmp eq i32 %.pre19, 0
br i1 %tobool, label %while.end.loopexit, label %while.body.backedge
while.end.loopexit: ; preds = %while.cond.backedge
br label %while.end
while.end: ; preds = %while.end.loopexit, %while.cond.preheader
%3 = load i32, i32* @h, align 4, !tbaa !1
%mul = mul nsw i32 %0, %3
%4 = load i32, i32* @g, align 4, !tbaa !1
%mul4 = mul nsw i32 %mul, %4
store i32 %mul4, i32* @j, align 4, !tbaa !1
%5 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 0, i32 0), align 4, !tbaa !7
%tobool5 = icmp eq i32 %5, 0
br i1 %tobool5, label %if.end, label %if.then
if.then: ; preds = %while.end
%div = sdiv i32 %5, %mul
store i32 %div, i32* @g, align 4, !tbaa !1
br label %if.end
if.end: ; preds = %while.end, %if.then
%6 = phi i32 [ %4, %while.end ], [ %div, %if.then ]
%7 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 1, i32 0), align 4, !tbaa !10
%tobool7 = icmp ne i32 %7, 0
%tobool8 = icmp ne i32 %mul4, 0
%or.cond = and i1 %tobool7, %tobool8
%tobool10 = icmp ne i32 %0, 0
%or.cond17 = and i1 %or.cond, %tobool10
br i1 %or.cond17, label %if.then11, label %if.end13
if.then11: ; preds = %if.end
store i32 %3, i32* @d, align 4, !tbaa !1
%8 = load i32, i32* @e, align 4, !tbaa !1
store i32 %8, i32* %p3, align 4, !tbaa !1
%.pre20 = load i32, i32* @g, align 4, !tbaa !1
br label %if.end13
if.end13: ; preds = %if.then11, %if.end
%9 = phi i32 [ %.pre20, %if.then11 ], [ %6, %if.end ]
%tobool14 = icmp eq i32 %9, 0
br i1 %tobool14, label %if.end16, label %if.then15
if.then15: ; preds = %if.end13
store i32 %p2, i32* %p1, align 4, !tbaa !1
br label %if.end16
if.end16: ; preds = %entry, %if.end13, %if.then15
ret i32 2
}
; CHECK: mfcr {{[0-9]+}}
!llvm.ident = !{!0}
!0 = !{!"clang version 3.9.0 (trunk 261520)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"any pointer", !3, i64 0}
!7 = !{!8, !2, i64 0}
!8 = !{!"", !9, i64 0, !9, i64 4}
!9 = !{!"", !2, i64 0}
!10 = !{!8, !2, i64 4}

View File

@ -0,0 +1,26 @@
; RUN: llc %s -o - -O0 -regalloc=fast | FileCheck %s
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.10"
@c = external global i8, align 1
@p = thread_local global i8* null, align 4
; Check that regalloc fast correctly preserves EAX that is set by the TLS call
; until the actual use.
; PR26485.
;
; CHECK-LABEL: f:
; Get p.
; CHECK: movl _p@{{[0-9a-zA-Z]+}}, [[P_ADDR:%[a-z]+]]
; CHECK-NEXT: calll *([[P_ADDR]])
; At this point eax contiains the address of p.
; Load c address.
; Make sure we do not clobber eax.
; CHECK-NEXT: movl L_c{{[^,]*}}, [[C_ADDR:%e[b-z]x+]]
; Store c address into p.
; CHECK-NEXT: movl [[C_ADDR]], (%eax)
define void @f() #0 {
entry:
store i8* @c, i8** @p, align 4
ret void
}

View File

@ -0,0 +1,60 @@
; Testcase generated from the following code:
; extern __thread int i;
; void f();
; int g(void) {
; if (i) {
; i = 0;
; f();
; }
; return i;
; }
; We want to make sure that TLS variables are not accessed before
; the stack frame is set up.
; RUN: llc < %s -relocation-model=pic | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-freebsd11.0"
@i = external thread_local global i32, align 4
define i32 @g() #0 {
entry:
%tmp = load i32, i32* @i, align 4
%tobool = icmp eq i32 %tmp, 0
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
store i32 0, i32* @i, align 4
tail call void (...) @f() #2
%.pre = load i32, i32* @i, align 4
br label %if.end
if.end: ; preds = %if.then, %entry
%tmp1 = phi i32 [ 0, %entry ], [ %.pre, %if.then ]
ret i32 %tmp1
}
; CHECK: g: # @g
; CHECK-NEXT: .cfi_startproc
; CHECK-NEXT: # BB#0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .Ltmp2:
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .Ltmp3:
; CHECK-NEXT: .cfi_offset %rbx, -24
; CHECK-NEXT: data16
; CHECK-NEXT: leaq i@TLSGD(%rip), %rdi
declare void @f(...) #1
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }