From d9c9bd8485071afb22adcd2bb08f6a8e5e587ed6 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Feb 2016 21:32:58 +0000 Subject: [PATCH 1/2] Vendor import of llvm release_38 branch r261684: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261684 --- lib/CodeGen/RegAllocFast.cpp | 10 +- lib/Target/AArch64/AArch64FrameLowering.cpp | 12 + lib/Target/AArch64/AArch64FrameLowering.h | 2 + lib/Target/PowerPC/PPCFrameLowering.cpp | 205 ++++++++++++++---- lib/Target/PowerPC/PPCFrameLowering.h | 35 ++- lib/Target/X86/X86ISelLowering.cpp | 34 +++ lib/Target/X86/X86ISelLowering.h | 3 + lib/Target/X86/X86InstrCompiler.td | 4 +- .../AArch64/aarch64-dynamic-stack-layout.ll | 2 +- test/CodeGen/AArch64/arm64-shrink-wrapping.ll | 85 ++++++++ test/CodeGen/ARM/Windows/alloca.ll | 4 +- test/CodeGen/PowerPC/pr26690.ll | 118 ++++++++++ test/CodeGen/X86/i386-tlscall-fastregalloc.ll | 26 +++ test/CodeGen/X86/tls-shrink-wrapping.ll | 60 +++++ 14 files changed, 540 insertions(+), 60 deletions(-) create mode 100644 test/CodeGen/PowerPC/pr26690.ll create mode 100644 test/CodeGen/X86/i386-tlscall-fastregalloc.ll create mode 100644 test/CodeGen/X86/tls-shrink-wrapping.ll diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index f4c076fea0e7..8d7a7213ba07 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1002,11 +1002,13 @@ void RAFast::AllocateBasicBlock() { unsigned DefOpEnd = MI->getNumOperands(); if (MI->isCall()) { - // Spill all virtregs before a call. This serves two purposes: 1. If an + // Spill all virtregs before a call. This serves one purpose: If an // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots, and 2. we don't have to wade through - // all the operands on the call instruction. - DefOpEnd = VirtOpEnd; + // registers in their spill slots. + // Note: although this is appealing to just consider all definitions + // as call-clobbered, this is not correct because some of those + // definitions may be used later on and we do not want to reuse + // those for virtual registers in between. DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 11ae8005370d..3f63d049c34e 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -275,6 +275,18 @@ static bool isCSSave(MachineInstr *MBBI) { MBBI->getOpcode() == AArch64::STPDpre; } +bool AArch64FrameLowering::canUseAsPrologue( + const MachineBasicBlock &MBB) const { + const MachineFunction *MF = MBB.getParent(); + const AArch64Subtarget &Subtarget = MF->getSubtarget(); + const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + + // Don't need a scratch register if we're not going to re-align the stack. + // Otherwise, we may need a scratch register to be available and we do not + // support that for now. + return !RegInfo->needsStackRealignment(*MF); +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 427afdf4acbf..7d8354c38787 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -37,6 +37,8 @@ class AArch64FrameLowering : public TargetFrameLowering { void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + bool canUseAsPrologue(const MachineBasicBlock &MBB) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; int resolveFrameIndexReference(const MachineFunction &MF, int FI, diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index beab844c6025..3fd509ae27f4 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -556,16 +556,42 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { } } -bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, - bool UseAtEnd, - unsigned *ScratchRegister) const { +/* This function will do the following: + - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 + respectively (defaults recommended by the ABI) and return true + - If MBB is not an entry block, initialize the register scavenger and look + for available registers. + - If the defaults (R0/R12) are available, return true + - If TwoUniqueRegsRequired is set to true, it looks for two unique + registers. Otherwise, look for a single available register. + - If the required registers are found, set SR1 and SR2 and return true. + - If the required registers are not found, set SR2 or both SR1 and SR2 to + PPC::NoRegister and return false. + + Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired + is not set, this function will attempt to find two different registers, but + still return true if only one register is available (and set SR1 == SR2). +*/ +bool +PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, + bool UseAtEnd, + bool TwoUniqueRegsRequired, + unsigned *SR1, + unsigned *SR2) const { RegScavenger RS; - unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; + unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; - if (ScratchRegister) - *ScratchRegister = R0; + // Set the defaults for the two scratch registers. + if (SR1) + *SR1 = R0; - // If MBB is an entry or exit block, use R0 as the scratch register + if (SR2) { + assert (SR1 && "Asking for the second scratch register but not the first?"); + *SR2 = R12; + } + + // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. if ((UseAtEnd && MBB->isReturnBlock()) || (!UseAtEnd && (&MBB->getParent()->front() == MBB))) return true; @@ -573,8 +599,8 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, RS.enterBasicBlock(MBB); if (UseAtEnd && !MBB->empty()) { - // The scratch register will be used at the end of the block, so must consider - // all registers used within the block + // The scratch register will be used at the end of the block, so must + // consider all registers used within the block MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); // If no terminator, back iterator up to previous instruction. @@ -584,35 +610,86 @@ bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, if (MBBI != MBB->begin()) RS.forward(MBBI); } - - if (!RS.isRegUsed(R0)) + + // If the two registers are available, we're all good. + // Note that we only return here if both R0 and R12 are available because + // although the function may not require two unique registers, it may benefit + // from having two so we should try to provide them. + if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) return true; - unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass - : &PPC::GPRCRegClass); - - // Make sure the register scavenger was able to find an available register - // If not, use R0 but return false to indicate no register was available and - // R0 must be used (as recommended by the ABI) - if (Reg == 0) + // Get the list of callee-saved registers for the target. + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); + + // Get all the available registers in the block. + BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : + &PPC::GPRCRegClass); + + // We shouldn't use callee-saved registers as scratch registers as they may be + // available when looking for a candidate block for shrink wrapping but not + // available when the actual prologue/epilogue is being emitted because they + // were added as live-in to the prologue block by PrologueEpilogueInserter. + for (int i = 0; CSRegs[i]; ++i) + BV.reset(CSRegs[i]); + + // Set the first scratch register to the first available one. + if (SR1) { + int FirstScratchReg = BV.find_first(); + *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; + } + + // If there is another one available, set the second scratch register to that. + // Otherwise, set it to either PPC::NoRegister if this function requires two + // or to whatever SR1 is set to if this function doesn't require two. + if (SR2) { + int SecondScratchReg = BV.find_next(*SR1); + if (SecondScratchReg != -1) + *SR2 = SecondScratchReg; + else + *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; + } + + // Now that we've done our best to provide both registers, double check + // whether we were unable to provide enough. + if (BV.count() < (TwoUniqueRegsRequired ? 2 : 1)) return false; - if (ScratchRegister) - *ScratchRegister = Reg; - return true; } +// We need a scratch register for spilling LR and for spilling CR. By default, +// we use two scratch registers to hide latency. However, if only one scratch +// register is available, we can adjust for that by not overlapping the spill +// code. However, if we need to realign the stack (i.e. have a base pointer) +// and the stack frame is large, we need two scratch registers. +bool +PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { + const PPCRegisterInfo *RegInfo = + static_cast(Subtarget.getRegisterInfo()); + MachineFunction &MF = *(MBB->getParent()); + bool HasBP = RegInfo->hasBasePointer(MF); + unsigned FrameSize = determineFrameLayout(MF, false); + int NegFrameSize = -FrameSize; + bool IsLargeFrame = !isInt<16>(NegFrameSize); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned MaxAlign = MFI->getMaxAlignment(); + + return IsLargeFrame && HasBP && MaxAlign > 1; +} + bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, false, nullptr); + return findScratchRegister(TmpMBB, false, + twoUniqueScratchRegsRequired(TmpMBB)); } bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { MachineBasicBlock *TmpMBB = const_cast(&MBB); - return findScratchRegister(TmpMBB, true, nullptr); + return findScratchRegister(TmpMBB, true); } void PPCFrameLowering::emitPrologue(MachineFunction &MF, @@ -664,6 +741,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); @@ -701,9 +779,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); - findScratchRegister(&MBB, false, &ScratchReg); - assert(ScratchReg && "No scratch register!"); - + // Using the same bool variable as below to supress compiler warnings. + bool SingleScratchReg = + findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), + &ScratchReg, &TempReg); + assert(SingleScratchReg && + "Required number of registers not available in this block"); + + SingleScratchReg = ScratchReg == TempReg; + int LROffset = getReturnSaveOffset(); int FPOffset = 0; @@ -748,13 +832,30 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. bool isLargeFrame = !isInt<16>(NegFrameSize); + assert((isPPC64 || !MustSaveCR) && + "Prologue CR saving supported only in 64-bit mode"); + + // If we need to spill the CR and the LR but we don't have two separate + // registers available, we must spill them one at a time + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. + // If only one or two CR fields are clobbered, it could be more + // efficient to use mfocrf to selectively save just those fields. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill); + BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) + .addReg(TempReg, getKillRegState(true)) + .addImm(8) + .addReg(SPReg); + } + if (MustSaveLR) BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Prologue CR saving supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) { // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 // FIXME: In the ELFv2 ABI, we are not required to save all CR fields. // If only one or two CR fields are clobbered, it could be more // efficient to use mfocrf to selectively save just those fields. @@ -792,7 +893,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addImm(LROffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) .addReg(TempReg, getKillRegState(true)) .addImm(8) @@ -811,6 +913,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addReg(SPReg); } + // This condition must be kept in sync with canUseAsPrologue. if (HasBP && MaxAlign > 1) { if (isPPC64) BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) @@ -828,6 +931,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, .addReg(ScratchReg, RegState::Kill) .addImm(NegFrameSize); } else { + assert(!SingleScratchReg && "Only a single scratch reg available"); BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) .addImm(NegFrameSize >> 16); BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) @@ -951,7 +1055,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF, // For SVR4, don't emit a move for the CR spill slot if we haven't // spilled CRs. if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) - && MustSaveCRs.empty()) + && !MustSaveCR) continue; // For 64-bit SVR4 when we have spilled CRs, the spill location @@ -1005,6 +1109,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, PPCFunctionInfo *FI = MF.getInfo(); bool MustSaveLR = FI->mustSaveLR(); const SmallVectorImpl &MustSaveCRs = FI->getMustSaveCRs(); + bool MustSaveCR = !MustSaveCRs.empty(); // Do we have a frame pointer and/or base pointer for this function? bool HasFP = hasFP(MF); bool HasBP = RegInfo->hasBasePointer(MF); @@ -1026,14 +1131,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, : PPC::ADDI ); const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 : PPC::ADD4 ); - + int LROffset = getReturnSaveOffset(); int FPOffset = 0; - findScratchRegister(&MBB, true, &ScratchReg); - assert(ScratchReg && "No scratch register!"); - + // Using the same bool variable as below to supress compiler warnings. + bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, + &TempReg); + assert(SingleScratchReg && + "Could not find an available scratch register"); + + SingleScratchReg = ScratchReg == TempReg; + if (HasFP) { if (isSVR4ABI) { MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -1130,15 +1240,27 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, } } + assert((isPPC64 || !MustSaveCR) && + "Epilogue CR restoring supported only in 64-bit mode"); + + // If we need to save both the LR and the CR and we only have one available + // scratch register, we must do them one at a time. + if (MustSaveCR && SingleScratchReg && MustSaveLR) { + BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) + .addImm(8) + .addReg(SPReg); + for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) + BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) + .addReg(TempReg, getKillRegState(i == e-1)); + } + if (MustSaveLR) BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) .addImm(LROffset) .addReg(SPReg); - assert((isPPC64 || MustSaveCRs.empty()) && - "Epilogue CR restoring supported only in 64-bit mode"); - - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) .addImm(8) .addReg(SPReg); @@ -1160,7 +1282,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, .addImm(BPOffset) .addReg(SPReg); - if (!MustSaveCRs.empty()) // will only occur for PPC64 + if (MustSaveCR && + !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) .addReg(TempReg, getKillRegState(i == e-1)); diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index bbe1329a5352..f1f3f0b831a7 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -30,28 +30,41 @@ class PPCFrameLowering: public TargetFrameLowering { const unsigned BasePointerSaveOffset; /** - * \brief Find a register that can be used in function prologue and epilogue + * \brief Find register[s] that can be used in function prologue and epilogue * - * Find a register that can be use as the scratch register in function + * Find register[s] that can be use as scratch register[s] in function * prologue and epilogue to save various registers (Link Register, Base - * Pointer, etc.). Prefer R0, if it is available. If it is not available, - * then choose a different register. + * Pointer, etc.). Prefer R0/R12, if available. Otherwise choose whatever + * register[s] are available. * - * This method will return true if an available register was found (including - * R0). If no available registers are found, the method returns false and sets - * ScratchRegister to R0, as per the recommendation in the ABI. + * This method will return true if it is able to find enough unique scratch + * registers (1 or 2 depending on the requirement). If it is unable to find + * enough available registers in the block, it will return false and set + * any passed output parameter that corresponds to a required unique register + * to PPC::NoRegister. * * \param[in] MBB The machine basic block to find an available register for * \param[in] UseAtEnd Specify whether the scratch register will be used at * the end of the basic block (i.e., will the scratch * register kill a register defined in the basic block) - * \param[out] ScratchRegister The scratch register to use - * \return true if a scratch register was found. false of a scratch register - * was not found and R0 is being used as the default. + * \param[in] TwoUniqueRegsRequired Specify whether this basic block will + * require two unique scratch registers. + * \param[out] SR1 The scratch register to use + * \param[out] SR2 The second scratch register. If this pointer is not null + * the function will attempt to set it to an available + * register regardless of whether there is a hard requirement + * for two unique scratch registers. + * \return true if the required number of registers was found. + * false if the required number of scratch register weren't available. + * If either output parameter refers to a required scratch register + * that isn't available, it will be set to an invalid value. */ bool findScratchRegister(MachineBasicBlock *MBB, bool UseAtEnd, - unsigned *ScratchRegister) const; + bool TwoUniqueRegsRequired = false, + unsigned *SR1 = nullptr, + unsigned *SR2 = nullptr) const; + bool twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const; public: PPCFrameLowering(const PPCSubtarget &STI); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c12a3ed43d29..dd9966f9e179 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -22227,6 +22227,35 @@ X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI, return BB; } +MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const { + // So, here we replace TLSADDR with the sequence: + // adjust_stackdown -> TLSADDR -> adjust_stackup. + // We need this because TLSADDR is lowered into calls + // inside MC, therefore without the two markers shrink-wrapping + // may push the prologue/epilogue pass them. + const TargetInstrInfo &TII = *Subtarget->getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction &MF = *BB->getParent(); + + // Emit CALLSEQ_START right before the instruction. + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + MachineInstrBuilder CallseqStart = + BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0); + BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); + + // Emit CALLSEQ_END right after the instruction. + // We don't call erase from parent because we want to keep the + // original instruction around. + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + MachineInstrBuilder CallseqEnd = + BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); + BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); + + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -22607,6 +22636,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::TCRETURNri64: case X86::TCRETURNmi64: return BB; + case X86::TLS_addr32: + case X86::TLS_addr64: + case X86::TLS_base_addr32: + case X86::TLS_base_addr64: + return EmitLoweredTLSAddr(MI, BB); case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); case X86::CATCHRET: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0ab786e08e02..b67958a9c498 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1129,6 +1129,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr *MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 96a29ca8c370..c709c8aca9fa 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -436,7 +436,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in { + usesCustomInserter = 1, Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, @@ -456,7 +456,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in { + usesCustomInserter = 1, Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll index 1820b8163a90..90093f94d0ad 100644 --- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -522,10 +522,10 @@ bb1: ; CHECK-LABEL: realign_conditional2 ; Extra realignment in the prologue (performance issue). -; CHECK: tbz {{.*}} .[[LABEL:.*]] ; CHECK: sub x9, sp, #32 // =32 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp +; CHECK: tbz {{.*}} .[[LABEL:.*]] ; Stack is realigned in a non-entry BB. ; CHECK: sub [[REG:x[01-9]+]], sp, #64 ; CHECK: and sp, [[REG]], #0xffffffffffffffe0 diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 2ecd66ddf5d4..4d751f501d4a 100644 --- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -630,3 +630,88 @@ loop2b: ; preds = %loop1 end: ret void } + +; Don't do shrink-wrapping when we need to re-align the stack pointer. +; See bug 26642. +; CHECK-LABEL: stack_realign: +; CHECK-NOT: lsl w[[LSL1:[0-9]+]], w0, w1 +; CHECK-NOT: lsl w[[LSL2:[0-9]+]], w1, w0 +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp +; CHECK: sub x{{[0-9]+}}, sp, #16 +; CHECK-DAG: lsl w[[LSL1:[0-9]+]], w0, w1 +; CHECK-DAG: lsl w[[LSL2:[0-9]+]], w1, w0 +; CHECK-DAG: str w[[LSL1]], +; CHECK-DAG: str w[[LSL2]], + +define i32 @stack_realign(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) { + %tmp = alloca i32, align 32 + %shl1 = shl i32 %a, %b + %shl2 = shl i32 %b, %a + %tmp2 = icmp slt i32 %a, %b + br i1 %tmp2, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + %tmp4 = load i32, i32* %tmp + br label %false + +false: + %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ] + store i32 %shl1, i32* %ptr1 + store i32 %shl2, i32* %ptr2 + ret i32 %tmp.0 +} + +; Re-aligned stack pointer with all caller-save regs live. See bug +; 26642. In this case we currently avoid shrink wrapping because +; ensuring we have a scratch register to re-align the stack pointer is +; too complicated. Output should be the same for both enabled and +; disabled shrink wrapping. +; CHECK-LABEL: stack_realign2: +; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #-{{[0-9]+}}]! +; CHECK: add x29, sp, #{{[0-9]+}} +; CHECK: lsl {{w[0-9]+}}, w0, w1 + +define void @stack_realign2(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2, i32* %ptr3, i32* %ptr4, i32* %ptr5, i32* %ptr6) { + %tmp = alloca i32, align 32 + %tmp1 = shl i32 %a, %b + %tmp2 = shl i32 %b, %a + %tmp3 = lshr i32 %a, %b + %tmp4 = lshr i32 %b, %a + %tmp5 = add i32 %b, %a + %tmp6 = sub i32 %b, %a + %tmp7 = add i32 %tmp1, %tmp2 + %tmp8 = sub i32 %tmp2, %tmp3 + %tmp9 = add i32 %tmp3, %tmp4 + %tmp10 = add i32 %tmp4, %tmp5 + %cmp = icmp slt i32 %a, %b + br i1 %cmp, label %true, label %false + +true: + store i32 %a, i32* %tmp, align 4 + call void asm sideeffect "nop", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28}"() nounwind + br label %false + +false: + store i32 %tmp1, i32* %ptr1, align 4 + store i32 %tmp2, i32* %ptr2, align 4 + store i32 %tmp3, i32* %ptr3, align 4 + store i32 %tmp4, i32* %ptr4, align 4 + store i32 %tmp5, i32* %ptr5, align 4 + store i32 %tmp6, i32* %ptr6, align 4 + %idx1 = getelementptr inbounds i32, i32* %ptr1, i64 1 + store i32 %a, i32* %idx1, align 4 + %idx2 = getelementptr inbounds i32, i32* %ptr1, i64 2 + store i32 %b, i32* %idx2, align 4 + %idx3 = getelementptr inbounds i32, i32* %ptr1, i64 3 + store i32 %tmp7, i32* %idx3, align 4 + %idx4 = getelementptr inbounds i32, i32* %ptr1, i64 4 + store i32 %tmp8, i32* %idx4, align 4 + %idx5 = getelementptr inbounds i32, i32* %ptr1, i64 5 + store i32 %tmp9, i32* %idx5, align 4 + %idx6 = getelementptr inbounds i32, i32* %ptr1, i64 6 + store i32 %tmp10, i32* %idx6, align 4 + + ret void +} diff --git a/test/CodeGen/ARM/Windows/alloca.ll b/test/CodeGen/ARM/Windows/alloca.ll index 6a3d002ab3b3..0f20ffbd36db 100644 --- a/test/CodeGen/ARM/Windows/alloca.ll +++ b/test/CodeGen/ARM/Windows/alloca.ll @@ -13,7 +13,9 @@ entry: } ; CHECK: bl num_entries -; CHECK: movs [[R1:r[0-9]+]], #7 +; Any register is actually valid here, but turns out we use lr, +; because we do not have the kill flag on R0. +; CHECK: mov.w [[R1:lr]], #7 ; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 ; CHECK: bic [[R0]], [[R0]], #7 ; CHECK: lsrs r4, [[R0]], #2 diff --git a/test/CodeGen/PowerPC/pr26690.ll b/test/CodeGen/PowerPC/pr26690.ll new file mode 100644 index 000000000000..3e7662409d51 --- /dev/null +++ b/test/CodeGen/PowerPC/pr26690.ll @@ -0,0 +1,118 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s + +%struct.anon = type { %struct.anon.0, %struct.anon.1 } +%struct.anon.0 = type { i32 } +%struct.anon.1 = type { i32 } + +@i = common global i32 0, align 4 +@b = common global i32* null, align 8 +@c = common global i32 0, align 4 +@a = common global i32 0, align 4 +@h = common global i32 0, align 4 +@g = common global i32 0, align 4 +@j = common global i32 0, align 4 +@f = common global %struct.anon zeroinitializer, align 4 +@d = common global i32 0, align 4 +@e = common global i32 0, align 4 + +; Function Attrs: norecurse nounwind +define signext i32 @fn1(i32* nocapture %p1, i32 signext %p2, i32* nocapture %p3) { +entry: + %0 = load i32, i32* @i, align 4, !tbaa !1 + %cond = icmp eq i32 %0, 8 + br i1 %cond, label %if.end16, label %while.cond.preheader + +while.cond.preheader: ; preds = %entry + %1 = load i32*, i32** @b, align 8, !tbaa !5 + %2 = load i32, i32* %1, align 4, !tbaa !1 + %tobool18 = icmp eq i32 %2, 0 + br i1 %tobool18, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %while.cond.preheader + %.pre = load i32, i32* @c, align 4, !tbaa !1 + br label %while.body + +while.body: ; preds = %while.body.backedge, %while.body.lr.ph + switch i32 %.pre, label %while.body.backedge [ + i32 0, label %sw.bb1 + i32 8, label %sw.bb1 + i32 6, label %sw.bb1 + i32 24, label %while.cond.backedge + ] + +while.body.backedge: ; preds = %while.body, %while.cond.backedge + br label %while.body + +sw.bb1: ; preds = %while.body, %while.body, %while.body + store i32 2, i32* @a, align 4, !tbaa !1 + br label %while.cond.backedge + +while.cond.backedge: ; preds = %while.body, %sw.bb1 + store i32 4, i32* @a, align 4, !tbaa !1 + %.pre19 = load i32, i32* %1, align 4, !tbaa !1 + %tobool = icmp eq i32 %.pre19, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body.backedge + +while.end.loopexit: ; preds = %while.cond.backedge + br label %while.end + +while.end: ; preds = %while.end.loopexit, %while.cond.preheader + %3 = load i32, i32* @h, align 4, !tbaa !1 + %mul = mul nsw i32 %0, %3 + %4 = load i32, i32* @g, align 4, !tbaa !1 + %mul4 = mul nsw i32 %mul, %4 + store i32 %mul4, i32* @j, align 4, !tbaa !1 + %5 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 0, i32 0), align 4, !tbaa !7 + %tobool5 = icmp eq i32 %5, 0 + br i1 %tobool5, label %if.end, label %if.then + +if.then: ; preds = %while.end + %div = sdiv i32 %5, %mul + store i32 %div, i32* @g, align 4, !tbaa !1 + br label %if.end + +if.end: ; preds = %while.end, %if.then + %6 = phi i32 [ %4, %while.end ], [ %div, %if.then ] + %7 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @f, i64 0, i32 1, i32 0), align 4, !tbaa !10 + %tobool7 = icmp ne i32 %7, 0 + %tobool8 = icmp ne i32 %mul4, 0 + %or.cond = and i1 %tobool7, %tobool8 + %tobool10 = icmp ne i32 %0, 0 + %or.cond17 = and i1 %or.cond, %tobool10 + br i1 %or.cond17, label %if.then11, label %if.end13 + +if.then11: ; preds = %if.end + store i32 %3, i32* @d, align 4, !tbaa !1 + %8 = load i32, i32* @e, align 4, !tbaa !1 + store i32 %8, i32* %p3, align 4, !tbaa !1 + %.pre20 = load i32, i32* @g, align 4, !tbaa !1 + br label %if.end13 + +if.end13: ; preds = %if.then11, %if.end + %9 = phi i32 [ %.pre20, %if.then11 ], [ %6, %if.end ] + %tobool14 = icmp eq i32 %9, 0 + br i1 %tobool14, label %if.end16, label %if.then15 + +if.then15: ; preds = %if.end13 + store i32 %p2, i32* %p1, align 4, !tbaa !1 + br label %if.end16 + +if.end16: ; preds = %entry, %if.end13, %if.then15 + ret i32 2 +} + +; CHECK: mfcr {{[0-9]+}} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.9.0 (trunk 261520)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !3, i64 0} +!7 = !{!8, !2, i64 0} +!8 = !{!"", !9, i64 0, !9, i64 4} +!9 = !{!"", !2, i64 0} +!10 = !{!8, !2, i64 4} diff --git a/test/CodeGen/X86/i386-tlscall-fastregalloc.ll b/test/CodeGen/X86/i386-tlscall-fastregalloc.ll new file mode 100644 index 000000000000..775c0c1b3784 --- /dev/null +++ b/test/CodeGen/X86/i386-tlscall-fastregalloc.ll @@ -0,0 +1,26 @@ +; RUN: llc %s -o - -O0 -regalloc=fast | FileCheck %s +target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.10" + +@c = external global i8, align 1 +@p = thread_local global i8* null, align 4 + +; Check that regalloc fast correctly preserves EAX that is set by the TLS call +; until the actual use. +; PR26485. +; +; CHECK-LABEL: f: +; Get p. +; CHECK: movl _p@{{[0-9a-zA-Z]+}}, [[P_ADDR:%[a-z]+]] +; CHECK-NEXT: calll *([[P_ADDR]]) +; At this point eax contiains the address of p. +; Load c address. +; Make sure we do not clobber eax. +; CHECK-NEXT: movl L_c{{[^,]*}}, [[C_ADDR:%e[b-z]x+]] +; Store c address into p. +; CHECK-NEXT: movl [[C_ADDR]], (%eax) +define void @f() #0 { +entry: + store i8* @c, i8** @p, align 4 + ret void +} diff --git a/test/CodeGen/X86/tls-shrink-wrapping.ll b/test/CodeGen/X86/tls-shrink-wrapping.ll new file mode 100644 index 000000000000..37c1754c0be8 --- /dev/null +++ b/test/CodeGen/X86/tls-shrink-wrapping.ll @@ -0,0 +1,60 @@ +; Testcase generated from the following code: +; extern __thread int i; +; void f(); +; int g(void) { +; if (i) { +; i = 0; +; f(); +; } +; return i; +; } +; We want to make sure that TLS variables are not accessed before +; the stack frame is set up. + +; RUN: llc < %s -relocation-model=pic | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-freebsd11.0" + +@i = external thread_local global i32, align 4 + +define i32 @g() #0 { +entry: + %tmp = load i32, i32* @i, align 4 + %tobool = icmp eq i32 %tmp, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 0, i32* @i, align 4 + tail call void (...) @f() #2 + %.pre = load i32, i32* @i, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + %tmp1 = phi i32 [ 0, %entry ], [ %.pre, %if.then ] + ret i32 %tmp1 +} + +; CHECK: g: # @g +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: # BB#0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: data16 +; CHECK-NEXT: leaq i@TLSGD(%rip), %rdi + +declare void @f(...) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } From 283b7f175721fa9e0e13f59243932e7b26dffc26 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Wed, 24 Feb 2016 21:33:38 +0000 Subject: [PATCH 2/2] Vendor import of clang release_38 branch r261684: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261684 --- docs/ReleaseNotes.rst | 27 +++++++++++++++++++++++++++ lib/CodeGen/TargetInfo.cpp | 5 ++++- lib/Sema/SemaExpr.cpp | 11 +++++++---- test/CodeGen/ppc-varargs-struct.c | 2 ++ test/Sema/generic-selection.c | 4 ++++ 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 18015f8d7c6e..a9b4b0de2906 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -179,6 +179,33 @@ Several additional features/bugfixes have been added to the previous standards: - Improved diagnostics for function pointers. +OpenMP Support in Clang +--------------------- + +OpenMP 3.1 is fully supported and is enabled by default with -fopenmp +which now uses the clang OpenMP library instead of the GCC OpenMP library. +The runtime can be built in-tree. + +In addition to OpenMP 3.1, several important elements of the OpenMP 4.0/4.5 +are supported as well. We continue to aim to complete OpenMP 4.5 + +- ``map`` clause +- task dependencies +- ``num_teams`` clause +- ``thread_limit`` clause +- ``target`` and ``target data`` directive +- ``target`` directive with implicit data mapping +- ``target enter data`` and ``target exit data`` directive +- Array sections [2.4, Array Sections]. +- Directive name modifiers for ``if`` clause [2.12, if Clause]. +- ``linear`` clause can be used in loop-based directives [2.7.2, loop Construct]. +- ``simdlen`` clause [2.8, SIMD Construct]. +- ``hint`` clause [2.13.2, critical Construct]. +- Parsing/semantic analysis of all non-device directives introduced in OpenMP 4.5. + +The codegen for OpenMP constructs was significantly improved allowing us to produce much more stable and fast code. +Full test cases of IR are also implemented. + CUDA Support in Clang --------------------- Clang has experimental support for end-to-end CUDA compilation now: diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index cdb325f256f2..3d1ddef94657 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -3475,6 +3475,7 @@ class PPC32TargetCodeGenInfo : public TargetCodeGenInfo { Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, QualType Ty) const { + const unsigned OverflowLimit = 8; if (const ComplexType *CTy = Ty->getAs()) { // TODO: Implement this. For now ignore. (void)CTy; @@ -3517,7 +3518,7 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, } llvm::Value *CC = - Builder.CreateICmpULT(NumRegs, Builder.getInt8(8), "cond"); + Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond"); llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs"); llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow"); @@ -3569,6 +3570,8 @@ Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList, { CGF.EmitBlock(UsingOverflow); + Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr); + // Everything in the overflow area is rounded up to a size of at least 4. CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4); diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index ebf79812d8dc..5a2eb6060ee9 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -1365,10 +1365,13 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, // Decay and strip qualifiers for the controlling expression type, and handle // placeholder type replacement. See committee discussion from WG14 DR423. - ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); - if (R.isInvalid()) - return ExprError(); - ControllingExpr = R.get(); + { + EnterExpressionEvaluationContext Unevaluated(*this, Sema::Unevaluated); + ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr); + if (R.isInvalid()) + return ExprError(); + ControllingExpr = R.get(); + } // The controlling expression is an unevaluated operand, so side effects are // likely unintended. diff --git a/test/CodeGen/ppc-varargs-struct.c b/test/CodeGen/ppc-varargs-struct.c index 1ad57c26b485..d7936a126960 100644 --- a/test/CodeGen/ppc-varargs-struct.c +++ b/test/CodeGen/ppc-varargs-struct.c @@ -37,6 +37,7 @@ void testva (int n, ...) // CHECK-PPC-NEXT: br label %[[CONT:[a-z0-9]+]] // // CHECK-PPC:[[USING_OVERFLOW]] +// CHECK-PPC-NEXT: store i8 8, i8* [[GPRPTR]], align 4 // CHECK-PPC-NEXT: [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3 // CHECK-PPC-NEXT: [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4 // CHECK-PPC-NEXT: %{{[0-9]+}} = ptrtoint i8* %argp.cur to i32 @@ -76,6 +77,7 @@ void testva (int n, ...) // CHECK-PPC-NEXT: br label %[[CONT:[a-z0-9]+]] // // CHECK-PPC:[[USING_OVERFLOW]] +// CHECK-PPC-NEXT: store i8 8, i8* [[GPRPTR]], align 4 // CHECK-PPC-NEXT: [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3 // CHECK-PPC-NEXT: [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4 // CHECK-PPC-NEXT: [[MEMADDR:%.+]] = bitcast i8* [[OVERFLOW_AREA]] to i32* diff --git a/test/Sema/generic-selection.c b/test/Sema/generic-selection.c index 0563ec0f4fc0..5c02005d0fa8 100644 --- a/test/Sema/generic-selection.c +++ b/test/Sema/generic-selection.c @@ -31,4 +31,8 @@ void foo(int n) { const int i = 12; int a9[_Generic(i, int: 1, default: 2) == 1 ? 1 : -1]; + + // This is expected to not trigger any diagnostics because the controlling + // expression is not evaluated. + (void)_Generic(*(int *)0, int: 1); }